summaryrefslogtreecommitdiff
path: root/op_reg_common.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-12-26 10:31:16 -0700
committerKarl Williamson <public@khwilliamson.com>2011-01-16 16:36:43 -0700
commita62b1201c068dc7b099bcb7182e188c4d2fbf34c (patch)
tree6c067a6e4adc8f2333b749fa3592c2812e711b95 /op_reg_common.h
parent5458d9a05ef8545ccbb8a58e670fbede60d10480 (diff)
downloadperl-a62b1201c068dc7b099bcb7182e188c4d2fbf34c.tar.gz
Use multi-bit field for regex character set
The /d, /l, and /u regex modifiers are mutually exclusive. This patch changes the field that stores the character set to use more than one bit with an enum determining which one. This data structure more closely follows the semantics of their being mutually exclusive, and conserves bits as well, and is better expandable. A small API is added to set and query the bit field. This patch is not .xs source backwards compatible. A handful of cpan programs are affected.
Diffstat (limited to 'op_reg_common.h')
-rw-r--r--op_reg_common.h48
1 files changed, 43 insertions, 5 deletions
diff --git a/op_reg_common.h b/op_reg_common.h
index 238d7bdb0e..c6d846dd69 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -32,8 +32,47 @@
#define RXf_PMf_FOLD (1 << (RXf_PMf_STD_PMMOD_SHIFT+2)) /* /i */
#define RXf_PMf_EXTENDED (1 << (RXf_PMf_STD_PMMOD_SHIFT+3)) /* /x */
#define RXf_PMf_KEEPCOPY (1 << (RXf_PMf_STD_PMMOD_SHIFT+4)) /* /p */
-#define RXf_PMf_LOCALE (1 << (RXf_PMf_STD_PMMOD_SHIFT+5))
-#define RXf_PMf_UNICODE (1 << (RXf_PMf_STD_PMMOD_SHIFT+6))
+
+/* The character set for the regex is stored in a field of more than one bit
+ * using an enum, for reasons of compactness and to ensure that the options are
+ * mutually exclusive */
+typedef enum {
+ REGEX_DEPENDS_CHARSET = 0,
+ REGEX_LOCALE_CHARSET,
+ REGEX_UNICODE_CHARSET
+} regex_charset;
+
+#define _RXf_PMf_CHARSET_SHIFT ((RXf_PMf_STD_PMMOD_SHIFT)+5)
+#define RXf_PMf_CHARSET (3 << (_RXf_PMf_CHARSET_SHIFT)) /* 2 bits */
+
+/* embed.pl doesn't yet know how to handle static inline functions, so
+ manually decorate them here with gcc-style attributes.
+*/
+PERL_STATIC_INLINE void
+set_regex_charset(U32 * const flags, const regex_charset cs)
+ __attribute__nonnull__(1);
+
+PERL_STATIC_INLINE void
+set_regex_charset(U32 * const flags, const regex_charset cs)
+{
+ /* Sets the character set portion of 'flags' to 'cs', which is a member of
+ * the above enum */
+
+ *flags &= ~RXf_PMf_CHARSET;
+ *flags |= (cs << _RXf_PMf_CHARSET_SHIFT);
+}
+
+PERL_STATIC_INLINE regex_charset
+get_regex_charset(const U32 flags)
+ __attribute__warn_unused_result__;
+
+PERL_STATIC_INLINE regex_charset
+get_regex_charset(const U32 flags)
+{
+ /* Returns the enum corresponding to the character set in 'flags' */
+
+ return (flags & RXf_PMf_CHARSET) >> _RXf_PMf_CHARSET_SHIFT;
+}
/* Next available bit after the above. Name begins with '_' so won't be
* exported by B */
@@ -41,7 +80,7 @@
/* Mask of the above bits. These need to be transferred from op_pmflags to
* re->extflags during compilation */
-#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_UNICODE)
+#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_CHARSET|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
/* These copies need to be numerical or defsubs_h.PL won't know about them. */
#define PMf_MULTILINE 1<<0
@@ -49,9 +88,8 @@
#define PMf_FOLD 1<<2
#define PMf_EXTENDED 1<<3
#define PMf_KEEPCOPY 1<<4
-#define PMf_LOCALE 1<<5
-#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY || PMf_LOCALE != RXf_PMf_LOCALE
+#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY
# error RXf_PMf defines are wrong
#endif