diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-12-12 20:26:08 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-12-22 11:11:28 -0700 |
commit | 86f72d56e71cb202c98802b68923dfca4797c3e2 (patch) | |
tree | 10d09c56be441c5ebb8b9ef7515cc7a5c330347a | |
parent | 9c0b68885345dfc017948c78b79c222b1468d96c (diff) | |
download | perl-86f72d56e71cb202c98802b68923dfca4797c3e2.tar.gz |
Use array for some inversion lists
This patch creates an array pointing to the inversion lists that cover
the Latin-1 ranges for Posix character classes, and uses it instead of
the individual variables previously referred to.
-rw-r--r-- | embedvar.h | 9 | ||||
-rw-r--r-- | handy.h | 1 | ||||
-rw-r--r-- | intrpvar.h | 9 | ||||
-rw-r--r-- | perl.c | 4 | ||||
-rw-r--r-- | regcomp.c | 51 | ||||
-rw-r--r-- | sv.c | 11 |
6 files changed, 36 insertions, 49 deletions
diff --git a/embedvar.h b/embedvar.h index 270ce19565..09e081030b 100644 --- a/embedvar.h +++ b/embedvar.h @@ -55,14 +55,7 @@ #define PL_Env (vTHX->IEnv) #define PL_HasMultiCharFold (vTHX->IHasMultiCharFold) #define PL_L1Cased (vTHX->IL1Cased) -#define PL_L1PosixAlnum (vTHX->IL1PosixAlnum) -#define PL_L1PosixAlpha (vTHX->IL1PosixAlpha) -#define PL_L1PosixGraph (vTHX->IL1PosixGraph) -#define PL_L1PosixLower (vTHX->IL1PosixLower) -#define PL_L1PosixPrint (vTHX->IL1PosixPrint) -#define PL_L1PosixPunct (vTHX->IL1PosixPunct) -#define PL_L1PosixUpper (vTHX->IL1PosixUpper) -#define PL_L1PosixWord (vTHX->IL1PosixWord) +#define PL_L1Posix_ptrs (vTHX->IL1Posix_ptrs) #define PL_LIO (vTHX->ILIO) #define PL_Latin1 (vTHX->ILatin1) #define PL_Mem (vTHX->IMem) @@ -801,6 +801,7 @@ typedef enum { #endif #define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC +#define POSIX_CC_COUNT (_HIGHEST_REGCOMP_DOT_H_SYNC + 1) #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) # if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \ diff --git a/intrpvar.h b/intrpvar.h index a50ad51d4a..5fd84b4de1 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -569,10 +569,8 @@ PERLVAR(I, AboveLatin1, SV *) PERLVAR(I, PerlSpace, SV *) PERLVAR(I, XPerlSpace, SV *) -PERLVAR(I, L1PosixAlnum,SV *) PERLVAR(I, PosixAlnum, SV *) -PERLVAR(I, L1PosixAlpha,SV *) PERLVAR(I, PosixAlpha, SV *) PERLVAR(I, PosixBlank, SV *) @@ -585,25 +583,19 @@ PERLVAR(I, XPosixCntrl, SV *) PERLVAR(I, PosixDigit, SV *) -PERLVAR(I, L1PosixGraph,SV *) PERLVAR(I, PosixGraph, SV *) -PERLVAR(I, L1PosixLower,SV *) PERLVAR(I, PosixLower, SV *) -PERLVAR(I, L1PosixPrint,SV *) PERLVAR(I, PosixPrint, SV *) -PERLVAR(I, L1PosixPunct,SV *) PERLVAR(I, PosixPunct, SV *) PERLVAR(I, PosixSpace, SV *) PERLVAR(I, XPosixSpace, SV *) -PERLVAR(I, L1PosixUpper,SV *) PERLVAR(I, PosixUpper, SV *) -PERLVAR(I, L1PosixWord, SV *) PERLVAR(I, PosixWord, SV *) PERLVAR(I, PosixXDigit, SV *) @@ -625,6 +617,7 @@ PERLVAR(I, utf8_tofold, SV *) PERLVAR(I, utf8_charname_begin, SV *) PERLVAR(I, utf8_charname_continue, SV *) PERLVARA(I, utf8_swash_ptrs, POSIX_SWASH_COUNT, SV *) +PERLVARA(I, L1Posix_ptrs, POSIX_CC_COUNT, SV *) PERLVAR(I, last_swash_hv, HV *) PERLVAR(I, last_swash_tmps, U8 *) PERLVAR(I, last_swash_slen, STRLEN) @@ -1002,6 +1002,10 @@ perl_destruct(pTHXx) PL_utf8_idstart = NULL; PL_utf8_idcont = NULL; PL_utf8_foldclosures = NULL; + for (i = 0; i < POSIX_CC_COUNT; i++) { + SvREFCNT_dec(PL_L1Posix_ptrs[i]); + PL_L1Posix_ptrs[i] = NULL; + } if (!specialWARN(PL_compiling.cop_warnings)) PerlMemShared_free(PL_compiling.cop_warnings); @@ -5334,15 +5334,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, PL_ASCII = _new_invlist_C_array(ASCII_invlist); PL_Latin1 = _new_invlist_C_array(Latin1_invlist); - PL_L1PosixAlnum = _new_invlist_C_array(L1PosixAlnum_invlist); + PL_L1Posix_ptrs[_CC_ALPHANUMERIC] + = _new_invlist_C_array(L1PosixAlnum_invlist); PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist); - PL_L1PosixAlpha = _new_invlist_C_array(L1PosixAlpha_invlist); + PL_L1Posix_ptrs[_CC_ALPHA] + = _new_invlist_C_array(L1PosixAlpha_invlist); PL_PosixAlpha = _new_invlist_C_array(PosixAlpha_invlist); PL_PosixBlank = _new_invlist_C_array(PosixBlank_invlist); PL_XPosixBlank = _new_invlist_C_array(XPosixBlank_invlist); - PL_L1Cased = _new_invlist_C_array(L1Cased_invlist); PL_PosixCntrl = _new_invlist_C_array(PosixCntrl_invlist); @@ -5350,31 +5351,31 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, PL_PosixDigit = _new_invlist_C_array(PosixDigit_invlist); - PL_L1PosixGraph = _new_invlist_C_array(L1PosixGraph_invlist); + PL_L1Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(L1PosixGraph_invlist); PL_PosixGraph = _new_invlist_C_array(PosixGraph_invlist); - PL_L1PosixLower = _new_invlist_C_array(L1PosixLower_invlist); + PL_L1Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(L1PosixLower_invlist); PL_PosixLower = _new_invlist_C_array(PosixLower_invlist); - PL_L1PosixPrint = _new_invlist_C_array(L1PosixPrint_invlist); + PL_L1Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(L1PosixPrint_invlist); PL_PosixPrint = _new_invlist_C_array(PosixPrint_invlist); - PL_L1PosixPunct = _new_invlist_C_array(L1PosixPunct_invlist); + PL_L1Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(L1PosixPunct_invlist); PL_PosixPunct = _new_invlist_C_array(PosixPunct_invlist); PL_PerlSpace = _new_invlist_C_array(PerlSpace_invlist); PL_XPerlSpace = _new_invlist_C_array(XPerlSpace_invlist); - PL_PosixSpace = _new_invlist_C_array(PosixSpace_invlist); PL_XPosixSpace = _new_invlist_C_array(XPosixSpace_invlist); - PL_L1PosixUpper = _new_invlist_C_array(L1PosixUpper_invlist); + PL_L1Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(L1PosixUpper_invlist); PL_PosixUpper = _new_invlist_C_array(PosixUpper_invlist); PL_VertSpace = _new_invlist_C_array(VertSpace_invlist); PL_PosixWord = _new_invlist_C_array(PosixWord_invlist); - PL_L1PosixWord = _new_invlist_C_array(L1PosixWord_invlist); + PL_L1Posix_ptrs[_CC_WORDCHAR] + = _new_invlist_C_array(L1PosixWord_invlist); PL_PosixXDigit = _new_invlist_C_array(PosixXDigit_invlist); PL_XPosixXDigit = _new_invlist_C_array(XPosixXDigit_invlist); @@ -11925,20 +11926,20 @@ parseit: case ANYOF_ALPHANUMERIC: /* C's alnum, in contrast to \w */ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlnum, PL_utf8_swash_ptrs[_CC_ALPHANUMERIC], PL_L1PosixAlnum, swash_property_names[_CC_ALPHANUMERIC], listsv); + PL_PosixAlnum, PL_utf8_swash_ptrs[_CC_ALPHANUMERIC], PL_L1Posix_ptrs[_CC_ALPHANUMERIC], swash_property_names[_CC_ALPHANUMERIC], listsv); break; case ANYOF_NALPHANUMERIC: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlnum, PL_L1PosixAlnum, swash_property_names[_CC_ALPHANUMERIC], listsv, + PL_PosixAlnum, PL_L1Posix_ptrs[_CC_ALPHANUMERIC], swash_property_names[_CC_ALPHANUMERIC], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_ALPHA: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlpha, PL_utf8_swash_ptrs[_CC_ALPHA], PL_L1PosixAlpha, swash_property_names[_CC_ALPHA], listsv); + PL_PosixAlpha, PL_utf8_swash_ptrs[_CC_ALPHA], PL_L1Posix_ptrs[_CC_ALPHA], swash_property_names[_CC_ALPHA], listsv); break; case ANYOF_NALPHA: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixAlpha, PL_L1PosixAlpha, swash_property_names[_CC_ALPHA], listsv, + PL_PosixAlpha, PL_L1Posix_ptrs[_CC_ALPHA], swash_property_names[_CC_ALPHA], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_ASCII: @@ -12045,11 +12046,11 @@ parseit: break; case ANYOF_GRAPH: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixGraph, PL_utf8_swash_ptrs[_CC_GRAPH], PL_L1PosixGraph, swash_property_names[_CC_GRAPH], listsv); + PL_PosixGraph, PL_utf8_swash_ptrs[_CC_GRAPH], PL_L1Posix_ptrs[_CC_GRAPH], swash_property_names[_CC_GRAPH], listsv); break; case ANYOF_NGRAPH: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixGraph, PL_L1PosixGraph, swash_property_names[_CC_GRAPH], listsv, + PL_PosixGraph, PL_L1Posix_ptrs[_CC_GRAPH], swash_property_names[_CC_GRAPH], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_HORIZWS: @@ -12083,7 +12084,7 @@ parseit: } else { ascii_source = PL_PosixLower; - l1_source = PL_L1PosixLower; + l1_source = PL_L1Posix_ptrs[_CC_LOWER]; Xname = swash_property_names[_CC_LOWER]; swash = PL_utf8_swash_ptrs[_CC_LOWER]; } @@ -12100,20 +12101,20 @@ parseit: } case ANYOF_PRINT: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPrint, PL_utf8_swash_ptrs[_CC_PRINT], PL_L1PosixPrint, swash_property_names[_CC_PRINT], listsv); + PL_PosixPrint, PL_utf8_swash_ptrs[_CC_PRINT], PL_L1Posix_ptrs[_CC_PRINT], swash_property_names[_CC_PRINT], listsv); break; case ANYOF_NPRINT: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPrint, PL_L1PosixPrint, swash_property_names[_CC_PRINT], listsv, + PL_PosixPrint, PL_L1Posix_ptrs[_CC_PRINT], swash_property_names[_CC_PRINT], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_PUNCT: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPunct, PL_utf8_swash_ptrs[_CC_PUNCT], PL_L1PosixPunct, swash_property_names[_CC_PUNCT], listsv); + PL_PosixPunct, PL_utf8_swash_ptrs[_CC_PUNCT], PL_L1Posix_ptrs[_CC_PUNCT], swash_property_names[_CC_PUNCT], listsv); break; case ANYOF_NPUNCT: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixPunct, PL_L1PosixPunct, swash_property_names[_CC_PUNCT], listsv, + PL_PosixPunct, PL_L1Posix_ptrs[_CC_PUNCT], swash_property_names[_CC_PUNCT], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_PSXSPC: @@ -12148,7 +12149,7 @@ parseit: } else { ascii_source = PL_PosixUpper; - l1_source = PL_L1PosixUpper; + l1_source = PL_L1Posix_ptrs[_CC_UPPER]; Xname = swash_property_names[_CC_UPPER]; swash = PL_utf8_swash_ptrs[_CC_UPPER]; } @@ -12165,11 +12166,11 @@ parseit: } case ANYOF_WORDCHAR: DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixWord, PL_utf8_swash_ptrs[_CC_WORDCHAR], PL_L1PosixWord, swash_property_names[_CC_WORDCHAR], listsv); + PL_PosixWord, PL_utf8_swash_ptrs[_CC_WORDCHAR], PL_L1Posix_ptrs[_CC_WORDCHAR], swash_property_names[_CC_WORDCHAR], listsv); break; case ANYOF_NWORDCHAR: DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes, - PL_PosixWord, PL_L1PosixWord, swash_property_names[_CC_WORDCHAR], listsv, + PL_PosixWord, PL_L1Posix_ptrs[_CC_WORDCHAR], swash_property_names[_CC_WORDCHAR], listsv, runtime_posix_matches_above_Unicode); break; case ANYOF_VERTWS: @@ -12652,7 +12653,7 @@ parseit: * indicators, which are weeded out below using the * IS_IN_SOME_FOLD_L1() macro */ if (invlist_highest(cp_list) < 256) { - _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection); + _invlist_intersection(PL_L1Posix_ptrs[_CC_ALPHA], cp_list, &fold_intersection); } else { @@ -13591,10 +13591,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_PerlSpace = sv_dup_inc(proto_perl->IPerlSpace, param); PL_XPerlSpace = sv_dup_inc(proto_perl->IXPerlSpace, param); - PL_L1PosixAlnum = sv_dup_inc(proto_perl->IL1PosixAlnum, param); PL_PosixAlnum = sv_dup_inc(proto_perl->IPosixAlnum, param); - PL_L1PosixAlpha = sv_dup_inc(proto_perl->IL1PosixAlpha, param); PL_PosixAlpha = sv_dup_inc(proto_perl->IPosixAlpha, param); PL_PosixBlank = sv_dup_inc(proto_perl->IPosixBlank, param); @@ -13607,25 +13605,19 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_PosixDigit = sv_dup_inc(proto_perl->IPosixDigit, param); - PL_L1PosixGraph = sv_dup_inc(proto_perl->IL1PosixGraph, param); PL_PosixGraph = sv_dup_inc(proto_perl->IPosixGraph, param); - PL_L1PosixLower = sv_dup_inc(proto_perl->IL1PosixLower, param); PL_PosixLower = sv_dup_inc(proto_perl->IPosixLower, param); - PL_L1PosixPrint = sv_dup_inc(proto_perl->IL1PosixPrint, param); PL_PosixPrint = sv_dup_inc(proto_perl->IPosixPrint, param); - PL_L1PosixPunct = sv_dup_inc(proto_perl->IL1PosixPunct, param); PL_PosixPunct = sv_dup_inc(proto_perl->IPosixPunct, param); PL_PosixSpace = sv_dup_inc(proto_perl->IPosixSpace, param); PL_XPosixSpace = sv_dup_inc(proto_perl->IXPosixSpace, param); - PL_L1PosixUpper = sv_dup_inc(proto_perl->IL1PosixUpper, param); PL_PosixUpper = sv_dup_inc(proto_perl->IPosixUpper, param); - PL_L1PosixWord = sv_dup_inc(proto_perl->IL1PosixWord, param); PL_PosixWord = sv_dup_inc(proto_perl->IPosixWord, param); PL_PosixXDigit = sv_dup_inc(proto_perl->IPosixXDigit, param); @@ -13640,6 +13632,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, for (i = 0; i < POSIX_SWASH_COUNT; i++) { PL_utf8_swash_ptrs[i] = sv_dup_inc(proto_perl->Iutf8_swash_ptrs[i], param); } + for (i = 0; i < POSIX_CC_COUNT; i++) { + PL_L1Posix_ptrs[i] = sv_dup_inc(proto_perl->IL1Posix_ptrs[i], param); + } PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); PL_utf8_X_regular_begin = sv_dup_inc(proto_perl->Iutf8_X_regular_begin, param); PL_utf8_X_extend = sv_dup_inc(proto_perl->Iutf8_X_extend, param); |