summaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2019-11-12 10:04:13 -0300
committerAdrian Thurston <thurston@colm.net>2019-11-12 10:04:13 -0300
commit9b94ba9d95148826e03c321f2d1a586a5bc27be7 (patch)
tree527b982582f24307f7999ee2ae554ec45d58ed63 /grammar
parent88f17454d8e03582b81e857c4cbd84aa64f3088d (diff)
downloadcolm-9b94ba9d95148826e03c321f2d1a586a5bc27be7.tar.gz
ragel pcre: handling backrefs and octals
Diffstat (limited to 'grammar')
-rw-r--r--grammar/pcre.rl79
1 files changed, 79 insertions, 0 deletions
diff --git a/grammar/pcre.rl b/grammar/pcre.rl
index df82c3b7..971c4d9f 100644
--- a/grammar/pcre.rl
+++ b/grammar/pcre.rl
@@ -227,6 +227,18 @@ void reverse_term_list( struct term **pphead )
REVERSE_LIST( struct term, pphead );
}
+int is_back_ref( int num, int num_closed_captures )
+{
+ if ( num < 8 || num <= num_closed_captures )
+ return 1;
+ return 0;
+}
+
+void append_back_ref( struct term *term, int number )
+{
+ struct element *el = new_element( element_back_ref_type, NULL, NULL );
+ LIST_APPEND( &term->element_list, el );
+}
/* Provides initial ragel call state stack. */
int *init_ragel_stack( int *size )
@@ -399,10 +411,76 @@ void apply_possessive( struct term *term )
other_char_non_printable
) @append_char;
+ #
+ # Octals and backrefs. There is an ambiguity between these two, therefore
+ # the grammar handles them together.
+ #
+
+ action init_octal
+ { octal = 0; }
+
+ action octal_digit
+ { octal = octal * 8 + ( *p - '0' ); }
+
+ backref_num = [1-9] [0-9]*;
+
+ octal_num =
+ [0-3] [0-7] [0-7] |
+ [0-7] [0-7] |
+ [0];
+
+ action octal { append_element_value( s_term, octal ); }
+
+ action octal_or_backref
+ {
+ if ( is_back_ref( number, closed_captures ) )
+ append_back_ref( s_term, number );
+ else
+ append_element_value( s_term, octal );
+ }
+
+ action backref
+ {
+ if ( number > closed_captures ) {
+ printf( "invalid backref: \\%d\n", number );
+ fbreak;
+ }
+
+ append_back_ref( s_term, number );
+ }
+
+ # Certainly octal. All octals, with possibly backrefs removed.
+ def_octal =
+ '\\' @init_octal
+ ( octal_num - backref_num ) $octal_digit;
+
+ # All cases that can be either octal or backref and we need to inspect the
+ # backref number to decide. Use the intersection of the two numbers as the
+ # pattern.
+ octal_or_backref =
+ '\\' @init_octal @init_number
+ ( octal_num & backref_num ) $octal_digit $decimal_digit;
+
+ # Definitely backref. The backref pattern excluding anything with a prefix
+ # that could be octal.
+ def_backref =
+ '\\' @init_number
+ ( backref_num - ( octal_num any* ) ) $decimal_digit;
+
+ octal =
+ def_octal %octal |
+ octal_or_backref %octal_or_backref;
+
+ backref =
+ def_backref %backref |
+ octal_or_backref %octal_or_backref;
+
atom =
literal |
char_class_end @append_char |
dot @{ append_element_value( s_term, dot ); } |
+ octal |
+ backref |
open_paren @{ fcall open_paren_forms; }
;
@@ -489,6 +567,7 @@ int pcre_parse( struct pattern **result_pattern, char *line, int len )
int result = 1;
int number = 0;
+ int octal = 0;
int quant_min, quant_max;
struct pattern *s_pattern = 0;