diff options
author | Brian Fraser <fraserbn@gmail.com> | 2012-01-12 17:22:05 -0300 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-01-29 10:07:40 -0700 |
commit | 838f2281125c4e0f98e5d741f9058f09c8242d33 (patch) | |
tree | 6fa3458446be72105180d95d66cf87a1d3b7cf3b | |
parent | 2a4315f8fb099a3fd3bbd5d9994af3919a6c5b05 (diff) | |
download | perl-838f2281125c4e0f98e5d741f9058f09c8242d33.tar.gz |
Implement the fc keyword and the \F string escape.
Along with the simple_casefolding and full_casefolding features.
fc() stands for foldcase, a sort of pseudo case (like lowercase),
which is used to implement Unicode casefolding. It maps a string
to a form where all case differences are erased, so it's a
locale-independent way of checking if two strings are the same,
regardless of case.
This functionality was, and still is, available through the
regular expression engine -- /i matches would use casefolding
internally. The fc keyword merely exposes this for easier access.
Previously, one could attempt to case-insensitively test two strings
for equality by doing
lc($a) eq lc($b)
But that might get you wrong results, for example in the case of
\x{DF}, LATIN SMALL LETTER SHARP S.
-rw-r--r-- | dist/B-Deparse/Deparse.pm | 10 | ||||
-rw-r--r-- | dist/B-Deparse/t/core.t | 4 | ||||
-rw-r--r-- | dist/B-Deparse/t/deparse.t | 1 | ||||
-rw-r--r-- | ext/Opcode/Opcode.pm | 4 | ||||
-rw-r--r-- | keywords.c | 12 | ||||
-rw-r--r-- | keywords.h | 385 | ||||
-rw-r--r-- | opcode.h | 5 | ||||
-rw-r--r-- | opnames.h | 3 | ||||
-rw-r--r-- | pp.c | 151 | ||||
-rw-r--r-- | pp_proto.h | 1 | ||||
-rwxr-xr-x | regen/keywords.pl | 3 | ||||
-rw-r--r-- | regen/opcodes | 3 | ||||
-rw-r--r-- | t/op/coreamp.t | 10 | ||||
-rw-r--r-- | t/op/cproto.t | 3 | ||||
-rw-r--r-- | t/op/lc.t | 72 | ||||
-rw-r--r-- | t/op/taint.t | 15 | ||||
-rw-r--r-- | t/uni/fold.t | 331 | ||||
-rw-r--r-- | toke.c | 16 |
18 files changed, 815 insertions, 214 deletions
diff --git a/dist/B-Deparse/Deparse.pm b/dist/B-Deparse/Deparse.pm index 296be02f84..054b919e38 100644 --- a/dist/B-Deparse/Deparse.pm +++ b/dist/B-Deparse/Deparse.pm @@ -20,7 +20,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring CVf_METHOD CVf_LVALUE PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED); -$VERSION = "1.11"; +$VERSION = "1.12"; use strict; use vars qw/$AUTOLOAD/; use warnings (); @@ -1674,6 +1674,7 @@ my %feature_keywords = ( break => 'switch', evalbytes=>'evalbytes', __SUB__ => '__SUB__', + fc => 'fc', ); sub keyword { @@ -2147,6 +2148,7 @@ sub pp_lcfirst { dq_unop(@_, "lcfirst") } sub pp_uc { dq_unop(@_, "uc") } sub pp_lc { dq_unop(@_, "lc") } sub pp_quotemeta { maybe_targmy(@_, \&dq_unop, "quotemeta") } +sub pp_fc { dq_unop(@_, "fc") } sub loopex { my $self = shift; @@ -4116,6 +4118,8 @@ sub dq { return '\l' . $self->dq($op->first->sibling); } elsif ($type eq "quotemeta") { return '\Q' . $self->dq($op->first->sibling) . '\E'; + } elsif ($type eq "fc") { + return '\F' . $self->dq($op->first->sibling) . '\E'; } elsif ($type eq "join") { return $self->deparse($op->last, 26); # was join($", @ary) } else { @@ -4437,6 +4441,8 @@ sub re_dq { return '\l' . $self->re_dq($op->first->sibling, $extended); } elsif ($type eq "quotemeta") { return '\Q' . $self->re_dq($op->first->sibling, $extended) . '\E'; + } elsif ($type eq "fc") { + return '\F' . $self->re_dq($op->first->sibling, $extended) . '\E'; } elsif ($type eq "join") { return $self->deparse($op->last, 26); # was join($", @ary) } else { @@ -4454,7 +4460,7 @@ sub pure_string { if ($type eq 'const' || $type eq 'av2arylen') { return 1; } - elsif ($type =~ /^[ul]c(first)?$/ || $type eq 'quotemeta') { + elsif ($type =~ /^(?:[ul]c(first)?|fc)$/ || $type eq 'quotemeta') { return $self->pure_string($op->first->sibling); } elsif ($type eq 'join') { diff --git a/dist/B-Deparse/t/core.t b/dist/B-Deparse/t/core.t index f5952af89c..62ff862e04 100644 --- a/dist/B-Deparse/t/core.t +++ b/dist/B-Deparse/t/core.t @@ -21,7 +21,7 @@ my @nary = ( [qw( abs alarm break chr cos chop close chdir chomp chmod chown chroot caller continue die dump exp exit exec endgrent endpwent endnetent endhostent endservent - endprotoent evalbytes fork glob + endprotoent evalbytes fc fork glob getppid getpwent getprotoent gethostent getnetent getservent getgrent getlogin getc gmtime hex int lc log lstat length lcfirst localtime mkdir ord oct pop quotemeta ref rand @@ -31,7 +31,7 @@ my @nary = ( # unary [qw( abs alarm bless binmode chr cos chop close chdir chomp chmod chown chroot closedir die do dump exp exit exec - each evalbytes fileno getpgrp getpwnam getpwuid getpeername + each evalbytes fc fileno getpgrp getpwnam getpwuid getpeername getprotobyname getprotobynumber gethostbyname getnetbyname getsockname getgrnam getgrgid getc glob gmtime hex int join keys kill lc diff --git a/dist/B-Deparse/t/deparse.t b/dist/B-Deparse/t/deparse.t index 2831981de7..cae808c1d0 100644 --- a/dist/B-Deparse/t/deparse.t +++ b/dist/B-Deparse/t/deparse.t @@ -870,6 +870,7 @@ CORE::given ($x) { } CORE::evalbytes ''; () = CORE::__SUB__; +() = CORE::fc $x; #### # feature features when feature has been disabled by use VERSION use feature (sprintf(":%vd", $^V)); diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm index 91a7206ec7..a9d5ab2151 100644 --- a/ext/Opcode/Opcode.pm +++ b/ext/Opcode/Opcode.pm @@ -6,7 +6,7 @@ use strict; our($VERSION, @ISA, @EXPORT_OK); -$VERSION = "1.22"; +$VERSION = "1.23"; use Carp; use Exporter (); @@ -325,7 +325,7 @@ invert_opset function. substr vec stringify study pos length index rindex ord chr - ucfirst lcfirst uc lc quotemeta trans transr chop schop chomp schomp + ucfirst lcfirst uc lc fc quotemeta trans transr chop schop chomp schomp match split qr diff --git a/keywords.c b/keywords.c index 169a41c411..23d550df83 100644 --- a/keywords.c +++ b/keywords.c @@ -51,7 +51,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) goto unknown; } - case 2: /* 18 tokens of length 2 */ + case 2: /* 19 tokens of length 2 */ switch (name[0]) { case 'd': @@ -70,6 +70,14 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) goto unknown; + case 'f': + if (name[1] == 'c') + { /* fc */ + return (all_keywords || FEATURE_FC_IS_ENABLED ? -KEY_fc : 0); + } + + goto unknown; + case 'g': switch (name[1]) { @@ -3441,5 +3449,5 @@ unknown: } /* Generated from: - * 29732a698b229f9e5f475fbb191f71c335c9e8d05b6168fe29e61c34c4f10bd2 regen/keywords.pl + * e5a540774760ea54c761ef17ee4a153cc264e9a700b817d561e390730c457406 regen/keywords.pl * ex: set ro: */ diff --git a/keywords.h b/keywords.h index ea200fc31c..5ebde874ff 100644 --- a/keywords.h +++ b/keywords.h @@ -77,198 +77,199 @@ #define KEY_exists 61 #define KEY_exit 62 #define KEY_exp 63 -#define KEY_fcntl 64 -#define KEY_fileno 65 -#define KEY_flock 66 -#define KEY_for 67 -#define KEY_foreach 68 -#define KEY_fork 69 -#define KEY_format 70 -#define KEY_formline 71 -#define KEY_ge 72 -#define KEY_getc 73 -#define KEY_getgrent 74 -#define KEY_getgrgid 75 -#define KEY_getgrnam 76 -#define KEY_gethostbyaddr 77 -#define KEY_gethostbyname 78 -#define KEY_gethostent 79 -#define KEY_getlogin 80 -#define KEY_getnetbyaddr 81 -#define KEY_getnetbyname 82 -#define KEY_getnetent 83 -#define KEY_getpeername 84 -#define KEY_getpgrp 85 -#define KEY_getppid 86 -#define KEY_getpriority 87 -#define KEY_getprotobyname 88 -#define KEY_getprotobynumber 89 -#define KEY_getprotoent 90 -#define KEY_getpwent 91 -#define KEY_getpwnam 92 -#define KEY_getpwuid 93 -#define KEY_getservbyname 94 -#define KEY_getservbyport 95 -#define KEY_getservent 96 -#define KEY_getsockname 97 -#define KEY_getsockopt 98 -#define KEY_given 99 -#define KEY_glob 100 -#define KEY_gmtime 101 -#define KEY_goto 102 -#define KEY_grep 103 -#define KEY_gt 104 -#define KEY_hex 105 -#define KEY_if 106 -#define KEY_index 107 -#define KEY_int 108 -#define KEY_ioctl 109 -#define KEY_join 110 -#define KEY_keys 111 -#define KEY_kill 112 -#define KEY_last 113 -#define KEY_lc 114 -#define KEY_lcfirst 115 -#define KEY_le 116 -#define KEY_length 117 -#define KEY_link 118 -#define KEY_listen 119 -#define KEY_local 120 -#define KEY_localtime 121 -#define KEY_lock 122 -#define KEY_log 123 -#define KEY_lstat 124 -#define KEY_lt 125 -#define KEY_m 126 -#define KEY_map 127 -#define KEY_mkdir 128 -#define KEY_msgctl 129 -#define KEY_msgget 130 -#define KEY_msgrcv 131 -#define KEY_msgsnd 132 -#define KEY_my 133 -#define KEY_ne 134 -#define KEY_next 135 -#define KEY_no 136 -#define KEY_not 137 -#define KEY_oct 138 -#define KEY_open 139 -#define KEY_opendir 140 -#define KEY_or 141 -#define KEY_ord 142 -#define KEY_our 143 -#define KEY_pack 144 -#define KEY_package 145 -#define KEY_pipe 146 -#define KEY_pop 147 -#define KEY_pos 148 -#define KEY_print 149 -#define KEY_printf 150 -#define KEY_prototype 151 -#define KEY_push 152 -#define KEY_q 153 -#define KEY_qq 154 -#define KEY_qr 155 -#define KEY_quotemeta 156 -#define KEY_qw 157 -#define KEY_qx 158 -#define KEY_rand 159 -#define KEY_read 160 -#define KEY_readdir 161 -#define KEY_readline 162 -#define KEY_readlink 163 -#define KEY_readpipe 164 -#define KEY_recv 165 -#define KEY_redo 166 -#define KEY_ref 167 -#define KEY_rename 168 -#define KEY_require 169 -#define KEY_reset 170 -#define KEY_return 171 -#define KEY_reverse 172 -#define KEY_rewinddir 173 -#define KEY_rindex 174 -#define KEY_rmdir 175 -#define KEY_s 176 -#define KEY_say 177 -#define KEY_scalar 178 -#define KEY_seek 179 -#define KEY_seekdir 180 -#define KEY_select 181 -#define KEY_semctl 182 -#define KEY_semget 183 -#define KEY_semop 184 -#define KEY_send 185 -#define KEY_setgrent 186 -#define KEY_sethostent 187 -#define KEY_setnetent 188 -#define KEY_setpgrp 189 -#define KEY_setpriority 190 -#define KEY_setprotoent 191 -#define KEY_setpwent 192 -#define KEY_setservent 193 -#define KEY_setsockopt 194 -#define KEY_shift 195 -#define KEY_shmctl 196 -#define KEY_shmget 197 -#define KEY_shmread 198 -#define KEY_shmwrite 199 -#define KEY_shutdown 200 -#define KEY_sin 201 -#define KEY_sleep 202 -#define KEY_socket 203 -#define KEY_socketpair 204 -#define KEY_sort 205 -#define KEY_splice 206 -#define KEY_split 207 -#define KEY_sprintf 208 -#define KEY_sqrt 209 -#define KEY_srand 210 -#define KEY_stat 211 -#define KEY_state 212 -#define KEY_study 213 -#define KEY_sub 214 -#define KEY_substr 215 -#define KEY_symlink 216 -#define KEY_syscall 217 -#define KEY_sysopen 218 -#define KEY_sysread 219 -#define KEY_sysseek 220 -#define KEY_system 221 -#define KEY_syswrite 222 -#define KEY_tell 223 -#define KEY_telldir 224 -#define KEY_tie 225 -#define KEY_tied 226 -#define KEY_time 227 -#define KEY_times 228 -#define KEY_tr 229 -#define KEY_truncate 230 -#define KEY_uc 231 -#define KEY_ucfirst 232 -#define KEY_umask 233 -#define KEY_undef 234 -#define KEY_unless 235 -#define KEY_unlink 236 -#define KEY_unpack 237 -#define KEY_unshift 238 -#define KEY_untie 239 -#define KEY_until 240 -#define KEY_use 241 -#define KEY_utime 242 -#define KEY_values 243 -#define KEY_vec 244 -#define KEY_wait 245 -#define KEY_waitpid 246 -#define KEY_wantarray 247 -#define KEY_warn 248 -#define KEY_when 249 -#define KEY_while 250 -#define KEY_write 251 -#define KEY_x 252 -#define KEY_xor 253 -#define KEY_y 254 +#define KEY_fc 64 +#define KEY_fcntl 65 +#define KEY_fileno 66 +#define KEY_flock 67 +#define KEY_for 68 +#define KEY_foreach 69 +#define KEY_fork 70 +#define KEY_format 71 +#define KEY_formline 72 +#define KEY_ge 73 +#define KEY_getc 74 +#define KEY_getgrent 75 +#define KEY_getgrgid 76 +#define KEY_getgrnam 77 +#define KEY_gethostbyaddr 78 +#define KEY_gethostbyname 79 +#define KEY_gethostent 80 +#define KEY_getlogin 81 +#define KEY_getnetbyaddr 82 +#define KEY_getnetbyname 83 +#define KEY_getnetent 84 +#define KEY_getpeername 85 +#define KEY_getpgrp 86 +#define KEY_getppid 87 +#define KEY_getpriority 88 +#define KEY_getprotobyname 89 +#define KEY_getprotobynumber 90 +#define KEY_getprotoent 91 +#define KEY_getpwent 92 +#define KEY_getpwnam 93 +#define KEY_getpwuid 94 +#define KEY_getservbyname 95 +#define KEY_getservbyport 96 +#define KEY_getservent 97 +#define KEY_getsockname 98 +#define KEY_getsockopt 99 +#define KEY_given 100 +#define KEY_glob 101 +#define KEY_gmtime 102 +#define KEY_goto 103 +#define KEY_grep 104 +#define KEY_gt 105 +#define KEY_hex 106 +#define KEY_if 107 +#define KEY_index 108 +#define KEY_int 109 +#define KEY_ioctl 110 +#define KEY_join 111 +#define KEY_keys 112 +#define KEY_kill 113 +#define KEY_last 114 +#define KEY_lc 115 +#define KEY_lcfirst 116 +#define KEY_le 117 +#define KEY_length 118 +#define KEY_link 119 +#define KEY_listen 120 +#define KEY_local 121 +#define KEY_localtime 122 +#define KEY_lock 123 +#define KEY_log 124 +#define KEY_lstat 125 +#define KEY_lt 126 +#define KEY_m 127 +#define KEY_map 128 +#define KEY_mkdir 129 +#define KEY_msgctl 130 +#define KEY_msgget 131 +#define KEY_msgrcv 132 +#define KEY_msgsnd 133 +#define KEY_my 134 +#define KEY_ne 135 +#define KEY_next 136 +#define KEY_no 137 +#define KEY_not 138 +#define KEY_oct 139 +#define KEY_open 140 +#define KEY_opendir 141 +#define KEY_or 142 +#define KEY_ord 143 +#define KEY_our 144 +#define KEY_pack 145 +#define KEY_package 146 +#define KEY_pipe 147 +#define KEY_pop 148 +#define KEY_pos 149 +#define KEY_print 150 +#define KEY_printf 151 +#define KEY_prototype 152 +#define KEY_push 153 +#define KEY_q 154 +#define KEY_qq 155 +#define KEY_qr 156 +#define KEY_quotemeta 157 +#define KEY_qw 158 +#define KEY_qx 159 +#define KEY_rand 160 +#define KEY_read 161 +#define KEY_readdir 162 +#define KEY_readline 163 +#define KEY_readlink 164 +#define KEY_readpipe 165 +#define KEY_recv 166 +#define KEY_redo 167 +#define KEY_ref 168 +#define KEY_rename 169 +#define KEY_require 170 +#define KEY_reset 171 +#define KEY_return 172 +#define KEY_reverse 173 +#define KEY_rewinddir 174 +#define KEY_rindex 175 +#define KEY_rmdir 176 +#define KEY_s 177 +#define KEY_say 178 +#define KEY_scalar 179 +#define KEY_seek 180 +#define KEY_seekdir 181 +#define KEY_select 182 +#define KEY_semctl 183 +#define KEY_semget 184 +#define KEY_semop 185 +#define KEY_send 186 +#define KEY_setgrent 187 +#define KEY_sethostent 188 +#define KEY_setnetent 189 +#define KEY_setpgrp 190 +#define KEY_setpriority 191 +#define KEY_setprotoent 192 +#define KEY_setpwent 193 +#define KEY_setservent 194 +#define KEY_setsockopt 195 +#define KEY_shift 196 +#define KEY_shmctl 197 +#define KEY_shmget 198 +#define KEY_shmread 199 +#define KEY_shmwrite 200 +#define KEY_shutdown 201 +#define KEY_sin 202 +#define KEY_sleep 203 +#define KEY_socket 204 +#define KEY_socketpair 205 +#define KEY_sort 206 +#define KEY_splice 207 +#define KEY_split 208 +#define KEY_sprintf 209 +#define KEY_sqrt 210 +#define KEY_srand 211 +#define KEY_stat 212 +#define KEY_state 213 +#define KEY_study 214 +#define KEY_sub 215 +#define KEY_substr 216 +#define KEY_symlink 217 +#define KEY_syscall 218 +#define KEY_sysopen 219 +#define KEY_sysread 220 +#define KEY_sysseek 221 +#define KEY_system 222 +#define KEY_syswrite 223 +#define KEY_tell 224 +#define KEY_telldir 225 +#define KEY_tie 226 +#define KEY_tied 227 +#define KEY_time 228 +#define KEY_times 229 +#define KEY_tr 230 +#define KEY_truncate 231 +#define KEY_uc 232 +#define KEY_ucfirst 233 +#define KEY_umask 234 +#define KEY_undef 235 +#define KEY_unless 236 +#define KEY_unlink 237 +#define KEY_unpack 238 +#define KEY_unshift 239 +#define KEY_untie 240 +#define KEY_until 241 +#define KEY_use 242 +#define KEY_utime 243 +#define KEY_values 244 +#define KEY_vec 245 +#define KEY_wait 246 +#define KEY_waitpid 247 +#define KEY_wantarray 248 +#define KEY_warn 249 +#define KEY_when 250 +#define KEY_while 251 +#define KEY_write 252 +#define KEY_x 253 +#define KEY_xor 254 +#define KEY_y 255 /* Generated from: - * 29732a698b229f9e5f475fbb191f71c335c9e8d05b6168fe29e61c34c4f10bd2 regen/keywords.pl + * e5a540774760ea54c761ef17ee4a153cc264e9a700b817d561e390730c457406 regen/keywords.pl * ex: set ro: */ @@ -521,6 +521,7 @@ EXTCONST char* const PL_op_name[] = { "rvalues", "coreargs", "runcv", + "fc", }; #endif @@ -901,6 +902,7 @@ EXTCONST char* const PL_op_desc[] = { "values on reference", "CORE:: subroutine", "__SUB__", + "fc", }; #endif @@ -1295,6 +1297,7 @@ EXT Perl_ppaddr_t PL_ppaddr[] /* or perlvars.h */ Perl_pp_rvalues, /* implemented by Perl_pp_rkeys */ Perl_pp_coreargs, Perl_pp_runcv, + Perl_pp_fc, } #endif #ifdef PERL_PPADDR_INITED @@ -1686,6 +1689,7 @@ EXT Perl_check_t PL_check[] /* or perlvars.h */ Perl_ck_each, /* rvalues */ Perl_ck_null, /* coreargs */ Perl_ck_null, /* runcv */ + Perl_ck_fun, /* fc */ } #endif #ifdef PERL_CHECK_INITED @@ -2071,6 +2075,7 @@ EXTCONST U32 PL_opargs[] = { 0x00001b08, /* rvalues */ 0x00000600, /* coreargs */ 0x00000004, /* runcv */ + 0x00009b8e, /* fc */ }; #endif @@ -387,10 +387,11 @@ typedef enum opcode { OP_RVALUES = 370, OP_COREARGS = 371, OP_RUNCV = 372, + OP_FC = 373, OP_max } opcode; -#define MAXO 373 +#define MAXO 374 /* the OP_IS_* macros are optimized to a simple range check because all the member OPs are contiguous in regen/opcodes table. @@ -4117,6 +4117,157 @@ PP(pp_quotemeta) RETURN; } +PP(pp_fc) +{ + dVAR; + dTARGET; + dSP; + SV *source = TOPs; + STRLEN len; + STRLEN min; + SV *dest; + const U8 *s; + const U8 *send; + U8 *d; + U8 tmpbuf[UTF8_MAXBYTES * UTF8_MAX_FOLD_CHAR_EXPAND + 1]; + const bool full_folding = TRUE; + const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 ) + | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 ); + + /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me. + * You are welcome(?) -Hugmeir + */ + + SvGETMAGIC(source); + + dest = TARG; + + if (SvOK(source)) { + s = (const U8*)SvPV_nomg_const(source, len); + } else { + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(source); + s = (const U8*)""; + len = 0; + } + + min = len + 1; + + SvUPGRADE(dest, SVt_PV); + d = (U8*)SvGROW(dest, min); + (void)SvPOK_only(dest); + + SETs(dest); + + send = s + len; + if (DO_UTF8(source)) { /* UTF-8 flagged string. */ + bool tainted = FALSE; + while (s < send) { + const STRLEN u = UTF8SKIP(s); + STRLEN ulen; + + _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted); + + if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + const UV o = d - (U8*)SvPVX_const(dest); + SvGROW(dest, min); + d = (U8*)SvPVX(dest) + o; + } + + Copy(tmpbuf, d, ulen, U8); + d += ulen; + s += u; + } + SvUTF8_on(dest); + if (tainted) { + TAINT; + SvTAINTED_on(dest); + } + } /* Unflagged string */ + else { + /* For locale, bytes, and nothing, the behavior is supposed to be the + * same as lc(). + */ + if ( IN_LOCALE_RUNTIME ) { /* Under locale */ + TAINT; + SvTAINTED_on(dest); + for (; s < send; d++, s++) + *d = toLOWER_LC(*s); + } + else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */ + for (; s < send; d++, s++) + *d = toLOWER(*s); + } + else { + /* For ASCII and the Latin-1 range, there's only two troublesome folds, + * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full casefolding + * becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which under any fold becomes + * \x{3BC} (\N{GREEK SMALL LETTER MU}) -- For the rest, the casefold is + * their lowercase. + */ + for (; s < send; d++, s++) { + if (*s == MICRO_SIGN) { + /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, which + * is outside of the latin-1 range. There's a couple of ways to + * deal with this -- khw discusses them in pp_lc/uc, so go there :) + * What we do here is upgrade what we had already casefolded, + * then enter an inner loop that appends the rest of the characters + * as UTF-8. + */ + len = d - (U8*)SvPVX_const(dest); + SvCUR_set(dest, len); + len = sv_utf8_upgrade_flags_grow(dest, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + (send -s) * UTF8_MAX_FOLD_CHAR_EXPAND + 1); + d = (U8*)SvPVX(dest) + len; + + CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU); + s++; + for (; s < send; s++) { + STRLEN ulen; + UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags); + if UNI_IS_INVARIANT(fc) { + if ( full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) { + *d++ = 's'; + *d++ = 's'; + } + else + *d++ = (U8)fc; + } + else { + Copy(tmpbuf, d, ulen, U8); + d += ulen; + } + } + break; + } + else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) { + /* Under full casefolding, LATIN SMALL LETTER SHARP S becomes "ss", + * which may require growing the SV. + */ + if (SvLEN(dest) < ++min) { + const UV o = d - (U8*)SvPVX_const(dest); + SvGROW(dest, min); + d = (U8*)SvPVX(dest) + o; + } + *(d)++ = 's'; + *d = 's'; + } + else { /* If it's not one of those two, the fold is their lower case */ + *d = toLOWER_LATIN1(*s); + } + } + } + } + *d = '\0'; + SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); + + if (SvTAINTED(source)) + SvTAINT(dest); + SvSETMAGIC(dest); + RETURN; +} + /* Arrays. */ PP(pp_aslice) diff --git a/pp_proto.h b/pp_proto.h index 96bcacbe6b..52011dadd3 100644 --- a/pp_proto.h +++ b/pp_proto.h @@ -66,6 +66,7 @@ PERL_CALLCONV OP *Perl_pp_eq(pTHX); PERL_CALLCONV OP *Perl_pp_exec(pTHX); PERL_CALLCONV OP *Perl_pp_exists(pTHX); PERL_CALLCONV OP *Perl_pp_exit(pTHX); +PERL_CALLCONV OP *Perl_pp_fc(pTHX); PERL_CALLCONV OP *Perl_pp_fileno(pTHX); PERL_CALLCONV OP *Perl_pp_flip(pTHX); PERL_CALLCONV OP *Perl_pp_flock(pTHX); diff --git a/regen/keywords.pl b/regen/keywords.pl index 3e0b0d36f0..b783d08f77 100755 --- a/regen/keywords.pl +++ b/regen/keywords.pl @@ -49,6 +49,8 @@ my %feature_kw = ( evalbytes=>'evalbytes', __SUB__ => '__SUB__', + + fc => 'fc', ); my %pos = map { ($_ => 1) } @{$by_strength{'+'}}; @@ -176,6 +178,7 @@ __END__ +exists -exit -exp +-fc -fcntl -fileno -flock diff --git a/regen/opcodes b/regen/opcodes index 23f6d2852a..22cc9133fd 100644 --- a/regen/opcodes +++ b/regen/opcodes @@ -545,3 +545,6 @@ rvalues values on reference ck_each t% S coreargs CORE:: subroutine ck_null $ runcv __SUB__ ck_null s0 + +# fc and \F +fc fc ck_fun fstu% S? diff --git a/t/op/coreamp.t b/t/op/coreamp.t index 3c0a4a412e..78ced66304 100644 --- a/t/op/coreamp.t +++ b/t/op/coreamp.t @@ -434,6 +434,16 @@ is $^A, ' 1 2', 'effect of &myformline'; lis [&myformline('@')], [1], '&myformline in list context'; test_proto 'exp'; + +test_proto 'fc'; +$tests += 2; +{ + my $sharp_s = "\xdf"; + is &myfc($sharp_s), $sharp_s, '&fc, no unicode_strings'; + use feature 'unicode_strings'; + is &myfc($sharp_s), "ss", '&fc, unicode_strings'; +} + test_proto 'fcntl'; test_proto 'fileno'; diff --git a/t/op/cproto.t b/t/op/cproto.t index dabb4bc4a6..8870df8d08 100644 --- a/t/op/cproto.t +++ b/t/op/cproto.t @@ -7,7 +7,7 @@ BEGIN { } BEGIN { require './test.pl'; } -plan tests => 247; +plan tests => 248; while (<DATA>) { chomp; @@ -83,6 +83,7 @@ exec undef exists undef exit (;$) exp (_) +fc (_) fcntl (*$$) fileno (*) flock (*$) @@ -1,18 +1,39 @@ #!./perl +# This file is intentionally encoded in latin-1. + BEGIN { chdir 't'; @INC = '../lib'; require './test.pl'; } -plan tests => 93; +use feature qw( fc ); + +plan tests => 124; is(lc(undef), "", "lc(undef) is ''"); is(lcfirst(undef), "", "lcfirst(undef) is ''"); is(uc(undef), "", "uc(undef) is ''"); is(ucfirst(undef), "", "ucfirst(undef) is ''"); +{ + no feature 'fc'; + is(CORE::fc(undef), "", "fc(undef) is ''"); + is(CORE::fc(''), "", "fc('') is ''"); + + local $@; + eval { fc("eeyup") }; + like($@, qr/Undefined subroutine &main::fc/, "fc() throws an exception,"); + + { + use feature 'fc'; + local $@; + eval { fc("eeyup") }; + ok(!$@, "...but works after requesting the feature"); + } +} + $a = "HELLO.* world"; $b = "hello.* WORLD"; @@ -21,27 +42,32 @@ is("\u$a" , "HELLO\.\* world", '\u'); is("\l$a" , "hELLO\.\* world", '\l'); is("\U$a" , "HELLO\.\* WORLD", '\U'); is("\L$a" , "hello\.\* world", '\L'); +is("\F$a" , "hello\.\* world", '\F'); is(quotemeta($a) , "HELLO\\.\\*\\ world", 'quotemeta'); is(ucfirst($a) , "HELLO\.\* world", 'ucfirst'); is(lcfirst($a) , "hELLO\.\* world", 'lcfirst'); is(uc($a) , "HELLO\.\* WORLD", 'uc'); is(lc($a) , "hello\.\* world", 'lc'); +is(fc($a) , "hello\.\* world", 'fc'); is("\Q$b\E." , "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD'); is("\u$b" , "Hello\.\* WORLD", '\u'); is("\l$b" , "hello\.\* WORLD", '\l'); is("\U$b" , "HELLO\.\* WORLD", '\U'); is("\L$b" , "hello\.\* world", '\L'); +is("\F$b" , "hello\.\* world", '\F'); is(quotemeta($b) , "hello\\.\\*\\ WORLD", 'quotemeta'); is(ucfirst($b) , "Hello\.\* WORLD", 'ucfirst'); is(lcfirst($b) , "hello\.\* WORLD", 'lcfirst'); is(uc($b) , "HELLO\.\* WORLD", 'uc'); is(lc($b) , "hello\.\* world", 'lc'); +is(fc($b) , "hello\.\* world", 'fc'); # \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is # \x{101}, LATIN SMALL LETTER A WITH MACRON. +# Which is also its foldcase. $a = "\x{100}\x{101}Aa"; $b = "\x{101}\x{100}aA"; @@ -51,24 +77,28 @@ is("\u$a" , "\x{100}\x{101}Aa", '\u'); is("\l$a" , "\x{101}\x{101}Aa", '\l'); is("\U$a" , "\x{100}\x{100}AA", '\U'); is("\L$a" , "\x{101}\x{101}aa", '\L'); +is("\F$a" , "\x{101}\x{101}aa", '\F'); is(quotemeta($a) , "\x{100}\x{101}Aa", 'quotemeta'); is(ucfirst($a) , "\x{100}\x{101}Aa", 'ucfirst'); is(lcfirst($a) , "\x{101}\x{101}Aa", 'lcfirst'); is(uc($a) , "\x{100}\x{100}AA", 'uc'); is(lc($a) , "\x{101}\x{101}aa", 'lc'); +is(fc($a) , "\x{101}\x{101}aa", 'fc'); is("\Q$b\E." , "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA'); is("\u$b" , "\x{100}\x{100}aA", '\u'); is("\l$b" , "\x{101}\x{100}aA", '\l'); is("\U$b" , "\x{100}\x{100}AA", '\U'); is("\L$b" , "\x{101}\x{101}aa", '\L'); +is("\F$b" , "\x{101}\x{101}aa", '\F'); is(quotemeta($b) , "\x{101}\x{100}aA", 'quotemeta'); is(ucfirst($b) , "\x{100}\x{100}aA", 'ucfirst'); is(lcfirst($b) , "\x{101}\x{100}aA", 'lcfirst'); is(uc($b) , "\x{100}\x{100}AA", 'uc'); is(lc($b) , "\x{101}\x{101}aa", 'lc'); +is(fc($b) , "\x{101}\x{101}aa", 'fc'); # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is @@ -83,6 +113,15 @@ is(latin1_to_native("\U\x{DF}aB\x{149}cD"), latin1_to_native("SSAB\x{2BC}NCD"), is(latin1_to_native("\L\x{DF}aB\x{149}cD"), latin1_to_native("\x{DF}ab\x{149}cd"), "multicharacter lowercase"); +# \x{DF} is LATIN SMALL LETTER SHARP S, its foldcase is ss or \x{73}\x{73}; +# \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its foldcase is +# \x{2BC}\x{6E} or MODIFIER LETTER APOSTROPHE and n. +# Note that is this further tested in t/uni/fold.t + +is(latin1_to_native("\F\x{DF}aB\x{149}cD"), latin1_to_native("ssab\x{2BC}ncd"), + "multicharacter foldcase"); + + # titlecase is used for \u / ucfirst. # \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is @@ -91,16 +130,18 @@ is(latin1_to_native("\L\x{DF}aB\x{149}cD"), latin1_to_native("\x{DF}ab\x{149}cd" # \x{587} itself # and its uppercase is # \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN +# The foldcase is \x{565}\x{582} ARMENIAN SMALL LETTER ECH + ARMENIAN SMALL LETTER YIWN $a = "\x{587}"; is("\L\x{587}" , "\x{587}", "ligature lowercase"); is("\u\x{587}" , "\x{535}\x{582}", "ligature titlecase"); is("\U\x{587}" , "\x{535}\x{552}", "ligature uppercase"); +is("\F\x{587}" , "\x{565}\x{582}", "ligature foldcase"); # mktables had problems where many-to-one case mappings didn't work right. # The lib/uni/fold.t should give the fourth folding, "casefolding", a good -# workout (one cannot directly get that from Perl). +# workout. # \x{01C4} is LATIN CAPITAL LETTER DZ WITH CARON # \x{01C5} is LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON # \x{01C6} is LATIN SMALL LETTER DZ WITH CARON @@ -127,6 +168,9 @@ is($c , $a, "Using s///e to change case."); ($c = $a) =~ s/(\p{IsWord}+)/uc($1)/ge; is($c , $b, "Using s///e to change case."); +($c = $a) =~ s/(\p{IsWord}+)/fc($1)/ge; +is($c , $a, "Using s///e to foldcase."); + ($c = $b) =~ s/(\p{IsWord}+)/lcfirst($1)/ge; is($c , "\x{3c3}FOO.bAR", "Using s///e to change case."); @@ -165,6 +209,11 @@ for ("a\x{100}", "yz\x{100}") { # to Ss (different length) is(substr(ucfirst($_), 0), ucfirst($_), "[perl #38619] ucfirst"); } +#fc() didn't exist back then, but coverage is coverage. +for ("a\x{100}", "yz\x{100}", "xyz\x{100}", "XYZ\x{100}") { # to Ss (different length) + is(substr(fc($_), 0), fc($_), "[perl #38619] fc"); +} + # Related to [perl #38619] # the original report concerns PERL_MAGIC_utf8. # these cases concern PERL_MAGIC_regex_global. @@ -187,11 +236,23 @@ for (map { $_ } "A\x{100}", "ABC\x{100}", "\x{100}") { is($result, $expect, "[perl #38619]"); } +for (map { $_ } "A\x{100}", "ABC\x{100}", "\x{100}") { + chop; # get ("A", "ABC", "") in utf8 + my $return = fc($_) =~ /\G(.?)/g; + my $result = $return ? $1 : "not"; + my $expect = (fc($_) =~ /(.?)/g)[0]; + is($return, 1, "[perl #38619]"); + is($result, $expect, "[perl #38619]"); +} + for (1, 4, 9, 16, 25) { is(uc "\x{03B0}" x $_, "\x{3a5}\x{308}\x{301}" x $_, 'uc U+03B0 grows threefold'); is(lc "\x{0130}" x $_, "i\x{307}" x $_, 'lc U+0130 grows'); + + is(fc "\x{03B0}" x $_, "\x{3C5}\x{308}\x{301}" x $_, + 'fc U+03B0 grows threefold'); } # bug #43207 @@ -201,5 +262,12 @@ for ("$temp") { is($_, "Hello"); } +# bug #43207 +my $temp = "Hello"; +for ("$temp") { + fc $_; + is($_, "Hello"); +} + # new in Unicode 5.1.0 is(lc("\x{1E9E}"), "\x{df}", "lc(LATIN CAPITAL LETTER SHARP S)"); diff --git a/t/op/taint.t b/t/op/taint.t index ca52f89178..0b626f340c 100644 --- a/t/op/taint.t +++ b/t/op/taint.t @@ -17,7 +17,7 @@ BEGIN { use strict; use Config; -plan tests => 787; +plan tests => 791; $| = 1; @@ -2176,6 +2176,19 @@ for(1,2) { } pass("no death when TARG of ref is tainted"); +{ + use feature 'fc'; + use locale; + my ($latin1, $utf8) = ("\xDF") x 2; + utf8::downgrade($latin1); + utf8::upgrade($utf8); + + is_tainted fc($latin1), "under locale, lc(latin1) taints the result"; + is_tainted fc($utf8), "under locale, lc(utf8) taints the result"; + + is_tainted "\F$latin1", "under locale, \\Flatin1 taints the result"; + is_tainted "\F$utf8", "under locale, \\Futf8 taints the result"; +} # This may bomb out with the alarm signal so keep it last SKIP: { diff --git a/t/uni/fold.t b/t/uni/fold.t index d481b53142..3dde70469e 100644 --- a/t/uni/fold.t +++ b/t/uni/fold.t @@ -2,6 +2,7 @@ use strict; use warnings; # re/fold_grind.t has more complex tests, but doesn't test every fold +# This file also tests the fc() keyword. BEGIN { chdir 't' if -d 't'; @@ -9,25 +10,27 @@ BEGIN { require './test.pl'; } +use feature 'unicode_strings'; + binmode *STDOUT, ":utf8"; our $TODO; plan("no_plan"); - # Read in the official case folding definitions. my $CF = '../lib/unicore/CaseFolding.txt'; die qq[$0: failed to open "$CF": $!\n] if ! open(my $fh, "<", $CF); my @CF; +my @simple_folds; my %reverse_fold; while (<$fh>) { - # Skip S since we are going for 'F'ull case folding. I is obsolete starting - # with Unicode 3.2, but leaving it in does no harm, and allows backward - # compatibility + # We only use 'S' in simple folded fc(), since the regex engine uses + # 'F'ull case folding. I is obsolete starting with Unicode 3.2, but + # leaving it in does no harm, and allows backward compatibility next unless my ($code, $type, $mapping, $name) = $_ =~ - /^([0-9A-F]+); ([CFI]); ((?:[0-9A-F]+)(?: [0-9A-F]+)*); \# (.+)/; + /^([0-9A-F]+); ([CFIS]); ((?:[0-9A-F]+)(?: [0-9A-F]+)*); \# (.+)/; # Convert any 0-255 range chars to native. $code = sprintf("%04X", ord_latin1_to_native(hex $code)) if hex $code < 0x100; @@ -35,6 +38,11 @@ while (<$fh>) { sprintf("%04X", ord_latin1_to_native(hex $_)) } split / /, $mapping; + if ( $type eq "S" ) { + push @simple_folds, [$code, $mapping, $type, $name]; + next; + } + push @CF, [$code, $mapping, $type, $name]; # Get the inverse fold for single-char mappings. @@ -43,6 +51,19 @@ while (<$fh>) { close($fh) or die "$0 Couldn't close $CF"; +foreach my $test_ref ( @simple_folds ) { + use feature 'fc'; + my ($code, $mapping, $type, $name) = @$test_ref; + my $c = pack("U0U*", hex $code); + my $f = pack("U0U*", map { hex } split " ", $mapping); + + my $against = join "", "qq{", map("\\x{$_}", split " ", $mapping), "}"; + { + isnt(fc($c), $f, "$code - $name - $mapping - $type - Full casefolding, fc(\\x{$code}) ne $against"); + isnt("\F$c", $f, "$code - $name - $mapping - $type - Full casefolding, qq{\\F\\x{$code}} ne $against"); + } +} + foreach my $test_ref (@CF) { my ($code, $mapping, $type, $name) = @$test_ref; my $c = pack("U0U*", hex $code); @@ -60,6 +81,24 @@ foreach my $test_ref (@CF) { ok eval $test, "$code - $name - $mapping - $type - $test"; } + { + # fc() tests + my $against = join "", "qq{", map("\\x{$_}", split " ", $mapping), "}"; + is(CORE::fc($c), $f, + "$code - $name - $mapping - $type - fc(\\x{$code}) eq $against"); + is("\F$c", $f, "$code - $name - $mapping - $type - qq{\\F\\x{$code}} eq $against"); + + # And here we test bytes. For [A-Za-z0-9], the fold is the same as lc under + # bytes. For everything else, it's the bytes that formed the original string. + if ( $c =~ /[A-Za-z0-9]/ ) { + use bytes; + is(CORE::fc($c), lc($c), "$code - $name - fc and use bytes, ascii"); + } else { + my $copy = "" . $c; + utf8::encode($copy); + is($copy, do { use bytes; CORE::fc($c) }, "$code - $name - fc and use bytes"); + } + } # Certain tests weren't convenient to put in the list above since they are # TODO's in multi-character folds. if ($f_length == 1) { @@ -115,6 +154,288 @@ foreach my $test_ref (@CF) { } } +{ + use utf8; + use feature qw(fc); + # These three come from the ICU project's test suite, more especifically + # http://icu.sourcearchive.com/documentation/4.4~rc1-1/strcase_8cpp-source.html + + my $s = "A\N{U+00df}\N{U+00b5}\N{U+fb03}\N{U+1040C}\N{U+0130}\N{U+0131}"; + #\N{LATIN CAPITAL LETTER A}\N{LATIN SMALL LETTER SHARP S}\N{MICRO SIGN}\N{LATIN SMALL LIGATURE FFI}\N{DESERET CAPITAL LETTER AY}\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}\N{LATIN SMALL LETTER DOTLESS I} + + my $f = "ass\N{U+03bc}ffi\N{U+10434}i\N{U+0307}\N{U+0131}"; + #\N{LATIN SMALL LETTER A}\N{LATIN SMALL LETTER S}\N{LATIN SMALL LETTER S}\N{GREEK SMALL LETTER MU}\N{LATIN SMALL LETTER F}\N{LATIN SMALL LETTER F}\N{LATIN SMALL LETTER I}\N{DESERET SMALL LETTER AY}\N{LATIN SMALL LETTER I}\N{COMBINING DOT ABOVE}\N{LATIN SMALL LETTER DOTLESS I} + + is(fc($s), $f, "ICU's casefold test passes"); + is("\F$s", $f, "ICU's casefold test passes"); + + is( fc("aBİIıϐßffi"), "abi̇iıβssffi" ); + is( "\FaBİIıϐßffi", "abi̇iıβssffi" ); +# TODO: { +# local $::TODO = "turkic special cases"; +# is( fc "aBİIıϐßffi", "abiııβssffi" ); +# } + + # The next batch come from http://www.devdaily.com/java/jwarehouse/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java.shtml + # Except the article got most casings wrong. Or maybe Lucene does. + + is( fc("This is a test"), "this is a test" ); + is( fc("Ruß"), "russ" ); + is( fc("ΜΆΪΟΣ"), "μάϊοσ" ); + is( fc("Μάϊος"), "μάϊοσ" ); + is( fc("𐐖"), "𐐾" ); + is( fc("r\xe9sum\xe9"), "r\xe9sum\xe9" ); + is( fc("re\x{0301}sume\x{0301}"), "re\x{301}sume\x{301}" ); + is( fc("ELİF"), "eli\x{307}f" ); + is( fc("eli\x{307}f"), "eli\x{307}f"); + + # This batch comes from + # http://www.java2s.com/Open-Source/Java-Document/Internationalization-Localization/icu4j/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java.htm + # Which uses ICU as the backend. + + my @folding_mixed = ( + "\x{61}\x{42}\x{130}\x{49}\x{131}\x{3d0}\x{df}\x{fb03}", + "A\x{df}\x{b5}\x{fb03}\x{1040C}\x{130}\x{131}", + ); + + my @folding_default = ( + "\x{61}\x{62}\x{69}\x{307}\x{69}\x{131}\x{3b2}\x{73}\x{73}\x{66}\x{66}\x{69}", + "ass\x{3bc}ffi\x{10434}i\x{307}\x{131}", + ); + + my @folding_exclude_turkic = ( + "\x{61}\x{62}\x{69}\x{131}\x{131}\x{3b2}\x{73}\x{73}\x{66}\x{66}\x{69}", + "ass\x{3bc}ffi\x{10434}i\x{131}", + ); + + is( fc($folding_mixed[1]), $folding_default[1] ); + + is( fc($folding_mixed[0]), $folding_default[0] ); + +} + +{ + use utf8; + # Table stolen from tchrist's mail in + # http://bugs.python.org/file23051/casing-tests.py + # and http://98.245.80.27/tcpc/OSCON2011/case-test.python3 + # For reference, it's a longer version of what he posted here: + # http://stackoverflow.com/questions/6991038/case-insensitive-storage-and-unicode-compatibility + + #Couple of repeats because I'm lazy, not tchrist's fault. + + #This should probably go in t/op/lc.t + + my @test_table = ( +# ORIG LC_SIMPLE TC_SIMPLE UC_SIMPLE LC_FULL TC_FULL UC_FULL FC_SIMPLE FC_TURKIC FC_FULL +[ 'þǽr rihtes', 'þǽr rihtes', 'Þǽr Rihtes', 'ÞǼR RIHTES', 'þǽr rihtes', 'Þǽr Rihtes', 'ÞǼR RIHTES', 'þǽr rihtes', 'þǽr rihtes', 'þǽr rihtes', ], +[ 'duȝeðlice', 'duȝeðlice', 'Duȝeðlice', 'DUȜEÐLICE', 'duȝeðlice', 'Duȝeðlice', 'DUȜEÐLICE', 'duȝeðlice', 'duȝeðlice', 'duȝeðlice', ], +[ 'Ævar Arnfjörð Bjarmason', 'ævar arnfjörð bjarmason', 'Ævar Arnfjörð Bjarmason', 'ÆVAR ARNFJÖRÐ BJARMASON', 'ævar arnfjörð bjarmason', 'Ævar Arnfjörð Bjarmason', 'ÆVAR ARNFJÖRÐ BJARMASON', 'ævar arnfjörð bjarmason', 'ævar arnfjörð bjarmason', 'ævar arnfjörð bjarmason', ], +[ 'Кириллица', 'кириллица', 'Кириллица', 'КИРИЛЛИЦА', 'кириллица', 'Кириллица', 'КИРИЛЛИЦА', 'кириллица', 'кириллица', 'кириллица', ], +[ 'ij', 'ij', 'IJ', 'IJ', 'ij', 'IJ', 'IJ', 'ij', 'ij', 'ij', ], +[ 'Van Dijke', 'van dijke', 'Van Dijke', 'VAN DIJKE', 'van dijke', 'Van Dijke', 'VAN DIJKE', 'van dijke', 'van dijke', 'van dijke', ], +[ 'VAN DIJKE', 'van dijke', 'Van Dijke', 'VAN DIJKE', 'van dijke', 'Van Dijke', 'VAN DIJKE', 'van dijke', 'van dijke', 'van dijke', ], +[ 'efficient', 'efficient', 'Efficient', 'EffiCIENT', 'efficient', 'Efficient', 'EFFICIENT', 'efficient', 'efficient', 'efficient', ], +[ 'flour', 'flour', 'flour', 'flOUR', 'flour', 'Flour', 'FLOUR', 'flour', 'flour', 'flour', ], +[ 'flour and water', 'flour and water', 'flour And Water', 'flOUR AND WATER', 'flour and water', 'Flour And Water', 'FLOUR AND WATER', 'flour and water', 'flour and water', 'flour and water', ], +[ 'dzur', 'dzur', 'Dzur', 'DZUR', 'dzur', 'Dzur', 'DZUR', 'dzur', 'dzur', 'dzur', ], +[ 'Dzur', 'dzur', 'Dzur', 'DZUR', 'dzur', 'Dzur', 'DZUR', 'dzur', 'dzur', 'dzur', ], +[ 'DZUR', 'dzur', 'Dzur', 'DZUR', 'dzur', 'Dzur', 'DZUR', 'dzur', 'dzur', 'dzur', ], +[ 'dzur mountain', 'dzur mountain', 'Dzur Mountain', 'DZUR MOUNTAIN', 'dzur mountain', 'Dzur Mountain', 'DZUR MOUNTAIN', 'dzur mountain', 'dzur mountain', 'dzur mountain', ], +[ 'Dzur Mountain', 'dzur mountain', 'Dzur Mountain', 'DZUR MOUNTAIN', 'dzur mountain', 'Dzur Mountain', 'DZUR MOUNTAIN', 'dzur mountain', 'dzur mountain', 'dzur mountain', ], +[ 'DZUR MOUNTAIN', 'dzur mountain', 'Dzur Mountain', 'DZUR MOUNTAIN', 'dzur mountain', 'Dzur Mountain', 'DZUR MOUNTAIN', 'dzur mountain', 'dzur mountaın', 'dzur mountain', ], +[ 'poſt', 'poſt', 'Poſt', 'POST', 'poſt', 'Poſt', 'POST', 'post', 'post', 'post', ], +[ 'poſt', 'poſt', 'Poſt', 'POſt', 'poſt', 'Poſt', 'POST', 'poſt', 'post', 'post', ], +[ 'ſtop', 'ſtop', 'ſtop', 'ſtOP', 'ſtop', 'Stop', 'STOP', 'ſtop', 'stop', 'stop', ], +[ 'tschüß', 'tschüß', 'Tschüß', 'TSCHÜß', 'tschüß', 'Tschüß', 'TSCHÜSS', 'tschüß', 'tschüss', 'tschüss', ], +[ 'TSCHÜẞ', 'tschüß', 'Tschüß', 'TSCHÜẞ', 'tschüß', 'Tschüß', 'TSCHÜẞ', 'tschüß', 'tschüss', 'tschüss', ], +[ 'weiß', 'weiß', 'Weiß', 'WEIß', 'weiß', 'Weiß', 'WEISS', 'weiß', 'weiss', 'weiss', ], +[ 'WEIẞ', 'weiß', 'Weiß', 'WEIẞ', 'weiß', 'Weiß', 'WEIẞ', 'weiß', 'weıss', 'weiss', ], +[ 'ẞIEW', 'ßiew', 'ẞiew', 'ẞIEW', 'ßiew', 'ẞiew', 'ẞIEW', 'ßiew', 'ssıew', 'ssiew', ], +[ 'ᾲ', 'ᾲ', 'Ὰͅ', 'ᾺΙ', 'ᾲ', 'Ὰͅ', 'ᾺΙ', 'ὰι', 'ὰι', 'ὰι', ], +[ 'Ὰι', 'ὰι', 'Ὰι', 'ᾺΙ', 'ὰι', 'Ὰι', 'ᾺΙ', 'ὰι', 'ὰι', 'ὰι', ], +[ 'ᾺΙ', 'ὰι', 'Ὰι', 'ᾺΙ', 'ὰι', 'Ὰι', 'ᾺΙ', 'ὰι', 'ὰι', 'ὰι', ], +[ 'ᾲ', 'ᾲ', 'ᾲ', 'ᾲ', 'ᾲ', 'Ὰͅ', 'ᾺΙ', 'ᾲ', 'ὰι', 'ὰι', ], +[ 'Ὰͅ', 'ᾲ', 'Ὰͅ', 'ᾺΙ', 'ᾲ', 'Ὰͅ', 'ᾺΙ', 'ὰι', 'ὰι', 'ὰι', ], +[ 'ᾺΙ', 'ὰι', 'Ὰι', 'ᾺΙ', 'ὰι', 'Ὰι', 'ᾺΙ', 'ὰι', 'ὰι', 'ὰι', ], +[ 'ᾲ στο διάολο', 'ᾲ στο διάολο', 'ᾲ Στο Διάολο', 'ᾲ ΣΤΟ ΔΙΆΟΛΟ', 'ᾲ στο διάολο', 'Ὰͅ Στο Διάολο', 'ᾺΙ ΣΤΟ ΔΙΆΟΛΟ', 'ᾲ στο διάολο', 'ὰι στο διάολο', 'ὰι στο διάολο', ], +[ 'ᾲ στο διάολο', 'ᾲ στο διάολο', 'Ὰͅ Στο Διάολο', 'ᾺΙ ΣΤΟ ΔΙΆΟΛΟ', 'ᾲ στο διάολο', 'Ὰͅ Στο Διάολο', 'ᾺΙ ΣΤΟ ΔΙΆΟΛΟ', 'ὰι στο διάολο', 'ὰι στο διάολο', 'ὰι στο διάολο', ], +[ '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐇𐐝𐐀𐐡𐐇𐐓', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐇𐐝𐐀𐐡𐐇𐐓', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', ], +[ '𐐔𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐇𐐝𐐀𐐡𐐇𐐓', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐇𐐝𐐀𐐡𐐇𐐓', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', ], +[ '𐐔𐐇𐐝𐐀𐐡𐐇𐐓', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐇𐐝𐐀𐐡𐐇𐐓', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐯𐑅𐐨𐑉𐐯𐐻', '𐐔𐐇𐐝𐐀𐐡𐐇𐐓', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', '𐐼𐐯𐑅𐐨𐑉𐐯𐐻', ], +[ 'henry ⅷ', 'henry ⅷ', 'Henry Ⅷ', 'HENRY Ⅷ', 'henry ⅷ', 'Henry Ⅷ', 'HENRY Ⅷ', 'henry ⅷ', 'henry ⅷ', 'henry ⅷ', ], +[ 'Henry Ⅷ', 'henry ⅷ', 'Henry Ⅷ', 'HENRY Ⅷ', 'henry ⅷ', 'Henry Ⅷ', 'HENRY Ⅷ', 'henry ⅷ', 'henry ⅷ', 'henry ⅷ', ], +[ 'HENRY Ⅷ', 'henry ⅷ', 'Henry Ⅷ', 'HENRY Ⅷ', 'henry ⅷ', 'Henry Ⅷ', 'HENRY Ⅷ', 'henry ⅷ', 'henry ⅷ', 'henry ⅷ', ], +[ 'i work at ⓚ', 'i work at ⓚ', 'I Work At Ⓚ', 'I WORK AT Ⓚ', 'i work at ⓚ', 'I Work At Ⓚ', 'I WORK AT Ⓚ', 'i work at ⓚ', 'i work at ⓚ', 'i work at ⓚ', ], +[ 'I Work At Ⓚ', 'i work at ⓚ', 'I Work At Ⓚ', 'I WORK AT Ⓚ', 'i work at ⓚ', 'I Work At Ⓚ', 'I WORK AT Ⓚ', 'i work at ⓚ', 'ı work at ⓚ', 'i work at ⓚ', ], +[ 'I WORK AT Ⓚ', 'i work at ⓚ', 'I Work At Ⓚ', 'I WORK AT Ⓚ', 'i work at ⓚ', 'I Work At Ⓚ', 'I WORK AT Ⓚ', 'i work at ⓚ', 'ı work at ⓚ', 'i work at ⓚ', ], +[ 'istambul', 'istambul', 'Istambul', 'ISTAMBUL', 'istambul', 'Istambul', 'ISTAMBUL', 'istambul', 'istambul', 'istambul', ], +[ 'i̇stanbul', 'i̇stanbul', 'İstanbul', 'İSTANBUL', 'i̇stanbul', 'İstanbul', 'İSTANBUL', 'i̇stanbul', 'i̇stanbul', 'i̇stanbul', ], +[ 'İstanbul', 'i̇stanbul', 'İstanbul', 'İSTANBUL', 'i̇stanbul', 'İstanbul', 'İSTANBUL', 'i̇stanbul', 'ı̇stanbul', 'i̇stanbul', ], +[ 'İSTANBUL', 'istanbul', 'İstanbul', 'İSTANBUL', 'i̇stanbul', 'İstanbul', 'İSTANBUL', 'İstanbul', 'istanbul', 'i̇stanbul', ], +[ 'στιγμας', 'στιγμας', 'Στιγμας', 'ΣΤΙΓΜΑΣ', 'στιγμας', 'Στιγμας', 'ΣΤΙΓΜΑΣ', 'στιγμασ', 'στιγμασ', 'στιγμασ', ], +[ 'στιγμασ', 'στιγμασ', 'Στιγμασ', 'ΣΤΙΓΜΑΣ', 'στιγμασ', 'Στιγμασ', 'ΣΤΙΓΜΑΣ', 'στιγμασ', 'στιγμασ', 'στιγμασ', ], +[ 'ΣΤΙΓΜΑΣ', 'στιγμασ', 'Στιγμασ', 'ΣΤΙΓΜΑΣ', 'στιγμασ', 'Στιγμασ', 'ΣΤΙΓΜΑΣ', 'στιγμασ', 'στιγμασ', 'στιγμασ', ], +[ 'ʀᴀʀᴇ', 'ʀᴀʀᴇ', 'Ʀᴀʀᴇ', 'ƦᴀƦᴇ', 'ʀᴀʀᴇ', 'Ʀᴀʀᴇ', 'ƦᴀƦᴇ', 'ʀᴀʀᴇ', 'ʀᴀʀᴇ', 'ʀᴀʀᴇ', ], +[ 'Ʀᴀʀᴇ', 'ʀᴀʀᴇ', 'Ʀᴀʀᴇ', 'ƦᴀƦᴇ', 'ʀᴀʀᴇ', 'Ʀᴀʀᴇ', 'ƦᴀƦᴇ', 'ʀᴀʀᴇ', 'ʀᴀʀᴇ', 'ʀᴀʀᴇ', ], +[ 'ƦᴀƦᴇ', 'ʀᴀʀᴇ', 'Ʀᴀʀᴇ', 'ƦᴀƦᴇ', 'ʀᴀʀᴇ', 'Ʀᴀʀᴇ', 'ƦᴀƦᴇ', 'ʀᴀʀᴇ', 'ʀᴀʀᴇ', 'ʀᴀʀᴇ', ], +[ 'Ԧԧ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'ԧԧ', 'ԧԧ', ], +[ 'ԧԧ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'ԧԧ', 'ԧԧ', ], +[ 'Ԧԧ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'ԧԧ', 'ԧԧ', ], +[ 'ԦԦ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'Ԧԧ', 'ԦԦ', 'ԧԧ', 'ԧԧ', 'ԧԧ', ], +[ "þǽr rihtes", "þǽr rihtes", "Þǽr Rihtes", "ÞǼR RIHTES", "þǽr rihtes", "Þǽr Rihtes", "ÞǼR RIHTES", "þǽr rihtes", "þǽr rihtes", "þǽr rihtes", ], +[ "duȝeðlice", "duȝeðlice", "Duȝeðlice", "DUȜEÐLICE", "duȝeðlice", "Duȝeðlice", "DUȜEÐLICE", "duȝeðlice", "duȝeðlice", "duȝeðlice", ], +[ "Van Dijke", "van dijke", "Van Dijke", "VAN DIJKE", "van dijke", "Van Dijke", "VAN DIJKE", "van dijke", "van dijke", "van dijke", ], +[ "fi", "fi", "fi", "fi", "fi", "Fi", "FI", "fi", "fi", "fi", ], +[ "filesystem", "filesystem", "filesystem", "fiLESYSTEM", "filesystem", "Filesystem", "FILESYSTEM", "filesystem", "filesystem", "filesystem", ], +[ "efficient", "efficient", "Efficient", "EffiCIENT", "efficient", "Efficient", "EFFICIENT", "efficient", "efficient", "efficient", ], +[ "flour and water", "flour and water", "flour And Water", "flOUR AND WATER", "flour and water", "Flour And Water", "FLOUR AND WATER", "flour and water", "flour and water", "flour and water", ], +[ "dz", "dz", "Dz", "DZ", "dz", "Dz", "DZ", "dz", "dz", "dz", ], +[ "dzur mountain", "dzur mountain", "Dzur Mountain", "DZUR MOUNTAIN", "dzur mountain", "Dzur Mountain", "DZUR MOUNTAIN", "dzur mountain", "dzur mountain", "dzur mountain", ], +[ "poſt", "poſt", "Poſt", "POST", "poſt", "Poſt", "POST", "post", "post", "post", ], +[ "poſt", "poſt", "Poſt", "POſt", "poſt", "Poſt", "POST", "poſt", "post", "post", ], +[ "ſtop", "ſtop", "ſtop", "ſtOP", "ſtop", "Stop", "STOP", "ſtop", "stop", "stop", ], +[ "tschüß", "tschüß", "Tschüß", "TSCHÜß", "tschüß", "Tschüß", "TSCHÜSS", "tschüß", "tschüss", "tschüss", ], +[ "TSCHÜẞ", "tschüß", "Tschüß", "TSCHÜẞ", "tschüß", "Tschüß", "TSCHÜẞ", "tschüß", "tschüss", "tschüss", ], +[ "rußland", "rußland", "Rußland", "RUßLAND", "rußland", "Rußland", "RUSSLAND", "rußland", "russland", "russland", ], +[ "RUẞLAND", "rußland", "Rußland", "RUẞLAND", "rußland", "Rußland", "RUẞLAND", "rußland", "russland", "russland", ], +[ "weiß", "weiß", "Weiß", "WEIß", "weiß", "Weiß", "WEISS", "weiß", "weiss", "weiss", ], +[ "WEIẞ", "weiß", "Weiß", "WEIẞ", "weiß", "Weiß", "WEIẞ", "weiß", "weıss", "weiss", ], +[ "ẞIEW", "ßiew", "ẞiew", "ẞIEW", "ßiew", "ẞiew", "ẞIEW", "ßiew", "ssıew", "ssiew", ], +[ "ͅ", "ͅ", "Ι", "Ι", "ͅ", "Ι", "Ι", "ι", "ι", "ι", ], +[ "ᾲ", "ᾲ", "Ὰͅ", "ᾺΙ", "ᾲ", "Ὰͅ", "ᾺΙ", "ὰι", "ὰι", "ὰι", ], +[ "Ὰι", "ὰι", "Ὰι", "ᾺΙ", "ὰι", "Ὰι", "ᾺΙ", "ὰι", "ὰι", "ὰι", ], +[ "ᾺΙ", "ὰι", "Ὰι", "ᾺΙ", "ὰι", "Ὰι", "ᾺΙ", "ὰι", "ὰι", "ὰι", ], +[ "ᾲ", "ᾲ", "ᾲ", "ᾲ", "ᾲ", "Ὰͅ", "ᾺΙ", "ᾲ", "ὰι", "ὰι", ], +[ "Ὰͅ", "ᾲ", "Ὰͅ", "ᾺΙ", "ᾲ", "Ὰͅ", "ᾺΙ", "ὰι", "ὰι", "ὰι", ], +[ "ᾺΙ", "ὰι", "Ὰι", "ᾺΙ", "ὰι", "Ὰι", "ᾺΙ", "ὰι", "ὰι", "ὰι", ], +[ "ᾲ στο διάολο", "ᾲ στο διάολο", "ᾲ Στο Διάολο", "ᾲ ΣΤΟ ΔΙΆΟΛΟ", "ᾲ στο διάολο", "Ὰͅ Στο Διάολο", "ᾺΙ ΣΤΟ ΔΙΆΟΛΟ", "ᾲ στο διάολο", "ὰι στο διάολο", "ὰι στο διάολο", ], +[ "ᾲ στο διάολο", "ᾲ στο διάολο", "Ὰͅ Στο Διάολο", "ᾺΙ ΣΤΟ ΔΙΆΟΛΟ", "ᾲ στο διάολο", "Ὰͅ Στο Διάολο", "ᾺΙ ΣΤΟ ΔΙΆΟΛΟ", "ὰι στο διάολο", "ὰι στο διάολο", "ὰι στο διάολο", ], +[ "ⅷ", "ⅷ", "Ⅷ", "Ⅷ", "ⅷ", "Ⅷ", "Ⅷ", "ⅷ", "ⅷ", "ⅷ", ], +[ "henry ⅷ", "henry ⅷ", "Henry Ⅷ", "HENRY Ⅷ", "henry ⅷ", "Henry Ⅷ", "HENRY Ⅷ", "henry ⅷ", "henry ⅷ", "henry ⅷ", ], +[ "ⓚ", "ⓚ", "Ⓚ", "Ⓚ", "ⓚ", "Ⓚ", "Ⓚ", "ⓚ", "ⓚ", "ⓚ", ], +[ "i work at ⓚ", "i work at ⓚ", "I Work At Ⓚ", "I WORK AT Ⓚ", "i work at ⓚ", "I Work At Ⓚ", "I WORK AT Ⓚ", "i work at ⓚ", "i work at ⓚ", "i work at ⓚ", ], +[ "istambul", "istambul", "Istambul", "ISTAMBUL", "istambul", "Istambul", "ISTAMBUL", "istambul", "istambul", "istambul", ], +[ "i̇stanbul", "i̇stanbul", "İstanbul", "İSTANBUL", "i̇stanbul", "İstanbul", "İSTANBUL", "i̇stanbul", "i̇stanbul", "i̇stanbul", ], +[ "İstanbul", "i̇stanbul", "İstanbul", "İSTANBUL", "i̇stanbul", "İstanbul", "İSTANBUL", "i̇stanbul", "ı̇stanbul", "i̇stanbul", ], +[ "İSTANBUL", "istanbul", "İstanbul", "İSTANBUL", "i̇stanbul", "İstanbul", "İSTANBUL", "İstanbul", "istanbul", "i̇stanbul", ], +[ "στιγμας", "στιγμας", "Στιγμας", "ΣΤΙΓΜΑΣ", "στιγμας", "Στιγμας", "ΣΤΙΓΜΑΣ", "στιγμασ", "στιγμασ", "στιγμασ", ], +[ "στιγμασ", "στιγμασ", "Στιγμασ", "ΣΤΙΓΜΑΣ", "στιγμασ", "Στιγμασ", "ΣΤΙΓΜΑΣ", "στιγμασ", "στιγμασ", "στιγμασ", ], +[ "ΣΤΙΓΜΑΣ", "στιγμασ", "Στιγμασ", "ΣΤΙΓΜΑΣ", "στιγμασ", "Στιγμασ", "ΣΤΙΓΜΑΣ", "στιγμασ", "στιγμασ", "στιγμασ", ], +[ "ʀᴀʀᴇ", "ʀᴀʀᴇ", "Ʀᴀʀᴇ", "ƦᴀƦᴇ", "ʀᴀʀᴇ", "Ʀᴀʀᴇ", "ƦᴀƦᴇ", "ʀᴀʀᴇ", "ʀᴀʀᴇ", "ʀᴀʀᴇ", ], +[ "𐐼𐐯𐑅𐐨𐑉𐐯𐐻", "𐐼𐐯𐑅𐐨𐑉𐐯𐐻", "𐐔𐐯𐑅𐐨𐑉𐐯𐐻", "𐐔𐐇𐐝𐐀𐐡𐐇𐐓", "𐐼𐐯𐑅𐐨𐑉𐐯𐐻", "𐐔𐐯𐑅𐐨𐑉𐐯𐐻", "𐐔𐐇𐐝𐐀𐐡𐐇𐐓", "𐐼𐐯𐑅𐐨𐑉𐐯𐐻", "𐐼𐐯𐑅𐐨𐑉𐐯𐐻", "𐐼𐐯𐑅𐐨𐑉𐐯𐐻", ], +[ "Ԧԧ", "ԧԧ", "Ԧԧ", "ԦԦ", "ԧԧ", "Ԧԧ", "ԦԦ", "ԧԧ", "ԧԧ", "ԧԧ", ], +[ "ﬓﬔﬕﬖﬗ", "ﬓﬔﬕﬖﬗ", "ﬓﬔﬕﬖﬗ", "ﬓﬔﬕﬖﬗ", "ﬓﬔﬕﬖﬗ", "Մնﬔﬕﬖﬗ", "ՄՆՄԵՄԻՎՆՄԽ", "ﬓﬔﬕﬖﬗ", "մնմեմիվնմխ", "մնմեմիվնմխ", ], +[ "ʼn groot", "ʼn groot", "ʼn Groot", "ʼn GROOT", "ʼn groot", "ʼN Groot", "ʼN GROOT", "ʼn groot", "ʼn groot", "ʼn groot", ], +[ "ẚ", "ẚ", "ẚ", "ẚ", "ẚ", "Aʾ", "Aʾ", "ẚ", "aʾ", "aʾ", ], +[ "ff", "ff", "ff", "ff", "ff", "Ff", "FF", "ff", "ff", "ff", ], +[ "ǰ", "ǰ", "ǰ", "ǰ", "ǰ", "J̌", "J̌", "ǰ", "ǰ", "ǰ", ], +[ "550 nm or Å", "550 nm or å", "550 Nm Or Å", "550 NM OR Å", "550 nm or å", "550 Nm Or Å", "550 NM OR Å", "550 nm or å", "550 nm or å", "550 nm or å", ], +); + + use feature qw(fc); + + for (@test_table) { + my ($simple_lc, $simple_tc, $simple_uc, $simple_fc) = @{$_}[1, 2, 3, 7]; + my ($orig, $lower, $titlecase, $upper, $fc_turkic, $fc_full) = @{$_}[0,4,5,6,8,9]; + + is( fc($orig), $fc_full, 'fc works' ); + is( "\F$orig", $fc_full, '\F works' ); + is( lc($orig), $lower, 'lc works' ); + is( "\L$orig", $lower, '\L works' ); + is( uc($orig), $upper, 'uc works' ); + is( "\U$orig", $upper, '\U works' ); + } +} + +{ + use feature qw(fc); + package Eeyup { use overload q{""} => sub { "\x{df}" }, fallback => 1 } + package Uunope { use overload q{""} => sub { "\x{30cb}" }, fallback => 1 } + package Undef { use overload q{""} => sub { undef }, fallback => 1 } + + my $obj = bless {}, "Eeyup"; + is(fc($obj), "ss", "fc() works on overloaded objects returning latin-1"); + $obj = bless {}, "Eeyup"; + is("\F$obj", "ss", '\F works on overloaded objects returning latin-1'); + + $obj = bless {}, "Uunope"; + is(fc($obj), "\x{30cb}", "fc() works on overloaded objects returning UTF-8"); + $obj = bless {}, "Uunope"; + is("\F$obj", "\x{30cb}", '\F works on overloaded objects returning UTF-8'); + + $obj = bless {}, "Undef"; + my $warnings; + { + no warnings; + use warnings "uninitialized"; + local $SIG{__WARN__} = sub { $warnings++; like(shift, qr/Use of uninitialized value (?:\$obj )?in fc/) }; + fc(undef); + fc($obj); + } + is( $warnings, 2, "correct number of warnings" ); + + my $fetched = 0; + package Derpy { sub TIESCALAR { bless {}, shift } sub FETCH { $fetched++; "\x{df}" } } + + tie my $x, "Derpy"; + + is( fc($x), "ss", "fc() works on tied values" ); + is( $fetched, 1, "and only calls the magic once" ); + +} + +{ + use feature qw( fc ); + my $troublesome1 = "\xdf" x 11; #SvLEN should be 12, SvCUR should be 11 + #So this should force fc() to grow the string. + + is( fc($troublesome1), "ss" x 11, "fc() grows the string" ); + + my $troublesome2 = "abcdef:\x{df}:fjksjs"; #SvLEN should be 16, SvCUR should be 15 + is( fc($troublesome2), "abcdef:ss:fjksjs", "fc() expands \\x{DF} in the middle of a string that needs to grow" ); + + my $troublesome3 = ":\x{df}:"; + is( fc($troublesome3), ":ss:", "fc() expands \\x{DF} in the middle of a string" ); + + + my $troublesome4 = "\x{B5}"; #\N{MICRON SIGN} is latin-1, but its foldcase is in UTF-8 + + is( fc($troublesome4), "\x{3BC}", "fc() for a latin-1 \x{B5} returns UTF-8" ); + ok( !utf8::is_utf8($troublesome4), "fc() doesn't upgrade the original string" ); + + + my $troublesome5 = "\x{C9}abda\x{B5}aaf\x{C8}"; # Up until foldcasing \x{B5}, the string + # was in Latin-1. This tests that the + # results don't have illegal UTF-8 + # (i.e. leftover latin-1) in them + + is( fc($troublesome5), "\x{E9}abda\x{3BC}aaf\x{E8}" ); +} + +{ + use feature qw( fc unicode_strings ); + + # This tests both code paths in pp_fc + + for (0..0xff) { + my $latin1 = chr; + my $utf8 = $latin1; + utf8::downgrade($latin1); #No-op, but doesn't hurt + utf8::upgrade($utf8); + is(fc($latin1), fc($utf8), "fc() gives the same results for \\x{$_} in Latin-1 and UTF-8 under unicode_strings"); + { + use locale; + is(fc($latin1), lc($latin1), "use locale; fc(qq{\\x{$_}}), lc(qq{\\x{$_}}) when qq{\\x{$_}} is in latin-1"); + is(fc($utf8), lc($utf8), "use locale; fc(qq{\\x{$_}}), lc(qq{\\x{$_}}) when qq{\\x{$_}} is in latin-1"); + } + { + no feature 'unicode_strings'; + is(fc($latin1), lc($latin1), "under nothing, fc() for <256 is the same as lc"); + } + } +} + my $num_tests = curr_test() - 1; die qq[$0: failed to find casefoldings from "$CF"\n] unless $num_tests > 0; @@ -2922,7 +2922,7 @@ S_scan_const(pTHX_ char *start) } /* string-change backslash escapes */ - if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) { + if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) { --s; break; } @@ -4489,7 +4489,8 @@ Perl_yylex(pTHX) PL_lex_casestack[PL_lex_casemods] = '\0'; if (PL_bufptr != PL_bufend - && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) { + && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' + || oldmod == 'F')) { PL_bufptr += 2; PL_lex_state = LEX_INTERPCONCAT; #ifdef PERL_MAD @@ -4539,8 +4540,10 @@ Perl_yylex(pTHX) if (!PL_madskills) /* when just compiling don't need correct */ if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ - if ((*s == 'L' || *s == 'U') && - (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) { + if ((*s == 'L' || *s == 'U' || *s == 'F') && + (strchr(PL_lex_casestack, 'L') + || strchr(PL_lex_casestack, 'U') + || strchr(PL_lex_casestack, 'F'))) { PL_lex_casestack[--PL_lex_casemods] = '\0'; PL_lex_allbrackets--; return REPORT(')'); @@ -4564,6 +4567,8 @@ Perl_yylex(pTHX) NEXTVAL_NEXTTOKE.ival = OP_UC; else if (*s == 'Q') NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; + else if (*s == 'F') + NEXTVAL_NEXTTOKE.ival = OP_FC; else Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); if (PL_madskills) { @@ -7414,6 +7419,9 @@ Perl_yylex(pTHX) case KEY_fork: FUN0(OP_FORK); + case KEY_fc: + UNI(OP_FC); + case KEY_fcntl: LOP(OP_FCNTL,XTERM); |