summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-02-02 17:48:56 -0700
committerKarl Williamson <public@khwilliamson.com>2011-02-14 08:41:38 -0700
commitdf7a846035e6969f5e1c298976888d5cc7878d25 (patch)
tree166edd6f5ecc35bcf1a79324fc96df717715fd8b
parent5b67c30a31044c5d4e88d3f815fdfdf86649016b (diff)
downloadperl-df7a846035e6969f5e1c298976888d5cc7878d25.tar.gz
Initial setup to accommodate /aa regex modifier
This changes the bits to add a new charset type for /aa, and other bookkeeping for it.
-rw-r--r--op.h2
-rw-r--r--op_reg_common.h7
-rw-r--r--regexp.h7
-rw-r--r--regnodes.h8
4 files changed, 14 insertions, 10 deletions
diff --git a/op.h b/op.h
index 43be1f7127..35aa48d54c 100644
--- a/op.h
+++ b/op.h
@@ -367,7 +367,7 @@ struct pmop {
/* Leave some space, so future bit allocations can go either in the shared or
* unshared area without affecting binary compatibility */
-#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+7)
+#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+6)
/* taint $1 etc. if target tainted */
#define PMf_RETAINT (1<<(PMf_BASE_SHIFT+0))
diff --git a/op_reg_common.h b/op_reg_common.h
index f5096500a3..4c3fe2c42c 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -41,11 +41,12 @@ typedef enum {
REGEX_DEPENDS_CHARSET = 0,
REGEX_LOCALE_CHARSET,
REGEX_UNICODE_CHARSET,
- REGEX_ASCII_RESTRICTED_CHARSET
+ REGEX_ASCII_RESTRICTED_CHARSET,
+ REGEX_ASCII_MORE_RESTRICTED_CHARSET
} regex_charset;
#define _RXf_PMf_CHARSET_SHIFT ((RXf_PMf_STD_PMMOD_SHIFT)+5)
-#define RXf_PMf_CHARSET (3 << (_RXf_PMf_CHARSET_SHIFT)) /* 2 bits */
+#define RXf_PMf_CHARSET (7 << (_RXf_PMf_CHARSET_SHIFT)) /* 3 bits */
/* embed.pl doesn't yet know how to handle static inline functions, so
manually decorate them here with gcc-style attributes.
@@ -78,7 +79,7 @@ get_regex_charset(const U32 flags)
/* Next available bit after the above. Name begins with '_' so won't be
* exported by B */
-#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+7)
+#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+8)
/* Mask of the above bits. These need to be transferred from op_pmflags to
* re->extflags during compilation */
diff --git a/regexp.h b/regexp.h
index 9780297486..31b5be009b 100644
--- a/regexp.h
+++ b/regexp.h
@@ -269,6 +269,7 @@ and check for NULL.
#define UNICODE_PAT_MODS "u"
#define DEPENDS_PAT_MODS "d"
#define ASCII_RESTRICT_PAT_MODS "a"
+#define ASCII_MORE_RESTRICT_PAT_MODS "aa"
/* This string is expected by regcomp.c to be ordered so that the first
* character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of extflags; the next
@@ -293,7 +294,7 @@ and check for NULL.
/* Leave some space, so future bit allocations can go either in the shared or
* unshared area without affecting binary compatibility */
-#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2)
+#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+1)
/* embed.pl doesn't yet know how to handle static inline functions, so
manually decorate them here with gcc-style attributes.
@@ -302,7 +303,7 @@ PERL_STATIC_INLINE const char *
get_regex_charset_name(const U32 flags, STRLEN* const lenp)
__attribute__warn_unused_result__;
-#define MAX_CHARSET_NAME_LENGTH 1
+#define MAX_CHARSET_NAME_LENGTH 2
PERL_STATIC_INLINE const char *
get_regex_charset_name(const U32 flags, STRLEN* const lenp)
@@ -317,6 +318,8 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
+ case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
+ return ASCII_MORE_RESTRICT_PAT_MODS;
}
return "?"; /* Unknown */
diff --git a/regnodes.h b/regnodes.h
index 72c93ffb00..b1cc6b3624 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -715,15 +715,15 @@ EXTCONST char * const PL_reg_name[] = {
EXTCONST char * PL_reg_extflags_name[];
#else
EXTCONST char * const PL_reg_extflags_name[] = {
- /* Bits in extflags defined: 11111111111111111111111001111111 */
+ /* Bits in extflags defined: 11111111111111111111111011111111 */
"MULTILINE", /* 0x00000001 */
"SINGLELINE", /* 0x00000002 */
"FOLD", /* 0x00000004 */
"EXTENDED", /* 0x00000008 */
"KEEPCOPY", /* 0x00000010 */
- "CHARSET", /* 0x00000060 */
- "CHARSET", /* 0x00000060 */
- "UNUSED_BIT_7", /* 0x00000080 */
+ "CHARSET", /* 0x000000e0 */
+ "CHARSET", /* 0x000000e0 */
+ "CHARSET", /* 0x000000e0 */
"UNUSED_BIT_8", /* 0x00000100 */
"ANCH_BOL", /* 0x00000200 */
"ANCH_MBOL", /* 0x00000400 */