diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-12-26 10:31:16 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-01-16 16:36:43 -0700 |
commit | a62b1201c068dc7b099bcb7182e188c4d2fbf34c (patch) | |
tree | 6c067a6e4adc8f2333b749fa3592c2812e711b95 /regexp.h | |
parent | 5458d9a05ef8545ccbb8a58e670fbede60d10480 (diff) | |
download | perl-a62b1201c068dc7b099bcb7182e188c4d2fbf34c.tar.gz |
Use multi-bit field for regex character set
The /d, /l, and /u regex modifiers are mutually exclusive. This patch
changes the field that stores the character set to use more than one bit
with an enum determining which one. This data structure more
closely follows the semantics of their being mutually exclusive, and
conserves bits as well, and is better expandable.
A small API is added to set and query the bit field.
This patch is not .xs source backwards compatible. A handful of cpan
programs are affected.
Diffstat (limited to 'regexp.h')
-rw-r--r-- | regexp.h | 28 |
1 files changed, 27 insertions, 1 deletions
@@ -235,7 +235,7 @@ and check for NULL. /* Note, includes locale, unicode */ #define STD_PMMOD_FLAGS_CLEAR(pmfl) \ - *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_LOCALE|RXf_PMf_UNICODE) + *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_CHARSET) /* chars and strings used as regex pattern modifiers * Singular is a 'c'har, plural is a "string" @@ -293,6 +293,32 @@ and check for NULL. * unshared area without affecting binary compatibility */ #define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2) +/* embed.pl doesn't yet know how to handle static inline functions, so + manually decorate them here with gcc-style attributes. +*/ +PERL_STATIC_INLINE const char * +get_regex_charset_name(const U32 flags, STRLEN* const lenp) + __attribute__warn_unused_result__; + +#define MAX_CHARSET_NAME_LENGTH 1 + +PERL_STATIC_INLINE const char * +get_regex_charset_name(const U32 flags, STRLEN* const lenp) +{ + /* Returns a string that corresponds to the name of the regex character set + * given by 'flags', and *lenp is set the length of that string, which + * cannot exceed MAX_CHARSET_NAME_LENGTH characters */ + + *lenp = 1; + switch (get_regex_charset(flags)) { + case REGEX_DEPENDS_CHARSET: return DUAL_PAT_MODS; + case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS; + case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS; + } + + return "?"; /* Unknown */ +} + /* Anchor and GPOS related stuff */ #define RXf_ANCH_BOL (1<<(RXf_BASE_SHIFT+0)) #define RXf_ANCH_MBOL (1<<(RXf_BASE_SHIFT+1)) |