summaryrefslogtreecommitdiff
path: root/op_reg_common.h
blob: c3f3e7fefcebf14d7ad5501d7f9dd9622a3a156d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*    op_reg_common.h
 *
 *    Definitions common to by op.h and regexp.h
 *
 *    Copyright (C) 2010, 2011 by Larry Wall and others
 *
 *    You may distribute under the terms of either the GNU General Public
 *    License or the Artistic License, as specified in the README file.
 *
 */

/* These defines are used in both op.h and regexp.h  The definitions use the
 * shift form so that ext/B/Makefile.PL will pick them up.
 *
 * Data structures used in the two headers have common fields, and in fact one
 * is copied onto the other.  This makes it easy to keep them in sync */

/* This tells where the first of these bits is.  Setting it to 0 saved cycles
 * and memory.  I (khw) think the code will work if changed back, but haven't
 * tested it */
/* Make sure to update ext/re/re.pm when changing this! */
#ifndef RXf_PMf_STD_PMMOD_SHIFT /* Only expand #include of this file once */

#define RXf_PMf_STD_PMMOD_SHIFT 0

/* The bits need to be ordered so that the msix are contiguous starting at bit
 * RXf_PMf_STD_PMMOD_SHIFT, followed by the p.  See STD_PAT_MODS and
 * INT_PAT_MODS in regexp.h for the reason contiguity is needed */
/* Make sure to update lib/re.pm when changing these! */
/* Make sure you keep the pure PMf_ versions below in sync */
#define RXf_PMf_MULTILINE      (1U << (RXf_PMf_STD_PMMOD_SHIFT+0))    /* /m */
#define RXf_PMf_SINGLELINE     (1U << (RXf_PMf_STD_PMMOD_SHIFT+1))    /* /s */
#define RXf_PMf_FOLD           (1U << (RXf_PMf_STD_PMMOD_SHIFT+2))    /* /i */
#define RXf_PMf_EXTENDED       (1U << (RXf_PMf_STD_PMMOD_SHIFT+3))    /* /x */
#define RXf_PMf_EXTENDED_MORE  (1U << (RXf_PMf_STD_PMMOD_SHIFT+4))    /* /xx */
#define RXf_PMf_KEEPCOPY       (1U << (RXf_PMf_STD_PMMOD_SHIFT+5))    /* /p */

/* The character set for the regex is stored in a field of more than one bit
 * using an enum, for reasons of compactness and to ensure that the options are
 * mutually exclusive */
/* Make sure to update ext/re/re.pm and regcomp.sym (as these are used as
 * offsets for various node types, like POSIXD vs POSIXL, etc) when changing
 * this! */
typedef enum {
    REGEX_DEPENDS_CHARSET = 0,
    REGEX_LOCALE_CHARSET,
    REGEX_UNICODE_CHARSET,
    REGEX_ASCII_RESTRICTED_CHARSET,
    REGEX_ASCII_MORE_RESTRICTED_CHARSET
} regex_charset;

#define _RXf_PMf_CHARSET_SHIFT ((RXf_PMf_STD_PMMOD_SHIFT)+6)
#define RXf_PMf_CHARSET (7U << (_RXf_PMf_CHARSET_SHIFT)) /* 3 bits */

/* Manually decorate these functions here with gcc-style attributes just to
 * avoid making the regex_charset typedef global, which it would need to be for
 * proto.h to understand it */
PERL_STATIC_INLINE void
set_regex_charset(U32 * const flags, const regex_charset cs)
    __attribute__nonnull__(1);

PERL_STATIC_INLINE void
set_regex_charset(U32 * const flags, const regex_charset cs)
{
    /* Sets the character set portion of 'flags' to 'cs', which is a member of
     * the above enum */

    *flags &= ~RXf_PMf_CHARSET;
    *flags |= (cs << _RXf_PMf_CHARSET_SHIFT);
}

PERL_STATIC_INLINE regex_charset
get_regex_charset(const U32 flags)
    __attribute__warn_unused_result__;

PERL_STATIC_INLINE regex_charset
get_regex_charset(const U32 flags)
{
    /* Returns the enum corresponding to the character set in 'flags' */

    return (regex_charset) ((flags & RXf_PMf_CHARSET) >> _RXf_PMf_CHARSET_SHIFT);
}

#define _RXf_PMf_SHIFT_COMPILETIME (RXf_PMf_STD_PMMOD_SHIFT+9)

/*
  Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
  be used by regex engines to check whether they should set
  RXf_SKIPWHITE
*/
#define RXf_PMf_SPLIT (1U<<(RXf_PMf_STD_PMMOD_SHIFT+9))

/* Next available bit after the above.  Name begins with '_' so won't be
 * exported by B */
#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+10)

/* Mask of the above bits.  These need to be transferred from op_pmflags to
 * re->extflags during compilation */
#define RXf_PMf_COMPILETIME    (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE|RXf_PMf_KEEPCOPY|RXf_PMf_CHARSET)
#define RXf_PMf_FLAGCOPYMASK   (RXf_PMf_COMPILETIME|RXf_PMf_SPLIT)

#if 0   /* Temporary to get Jenkins happy again */
    /* Exclude win32 because it can't cope with I32_MAX definition */
#ifndef WIN32
#   if RXf_PMf_COMPILETIME > I32_MAX
#     error RXf_PMf_COMPILETIME wont fit in arg2 field of eval node
#   endif
#endif
#endif

/* These copies need to be numerical or ext/B/Makefile.PL won't think they are
 * constants */
#define PMf_MULTILINE     (1U<<0)
#define PMf_SINGLELINE    (1U<<1)
#define PMf_FOLD          (1U<<2)
#define PMf_EXTENDED      (1U<<3)
#define PMf_EXTENDED_MORE (1U<<4)
#define PMf_KEEPCOPY      (1U<<5)
#define PMf_CHARSET       (7U<<6)
#define PMf_SPLIT         (1U<<9)

#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_EXTENDED_MORE != RXf_PMf_EXTENDED_MORE || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY || PMf_SPLIT != RXf_PMf_SPLIT || PMf_CHARSET != RXf_PMf_CHARSET
#   error RXf_PMf defines are wrong
#endif

/*  Error check that haven't left something out of this.  This isn't done
 *  directly in the #define because doing so confuses regcomp.pl.
 *  (2**n - 1) is n 1 bits, so the below gets the contiguous bits between the
 *  beginning and ending shifts */
#if RXf_PMf_COMPILETIME != (((1 << (_RXf_PMf_SHIFT_COMPILETIME))-1) \
                            & (~((1 << RXf_PMf_STD_PMMOD_SHIFT)-1)))
#   error RXf_PMf_COMPILETIME is invalid
#endif

#endif /* Include only once */