summaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2019-11-11 08:35:39 -0300
committerAdrian Thurston <thurston@colm.net>2019-11-11 08:35:39 -0300
commit488e2eb3e841c051ca01ee0eeb9fffe5fbae1c01 (patch)
tree238add7a6d0fe1a58db33e6f7a6962d924a6cfd5 /grammar
parent21911073a0461412c0e9f2586fa11617cdeed195 (diff)
downloadcolm-488e2eb3e841c051ca01ee0eeb9fffe5fbae1c01.tar.gz
ragel pcre: added some basic literal forms
Diffstat (limited to 'grammar')
-rw-r--r--grammar/pcre.rl62
1 files changed, 59 insertions, 3 deletions
diff --git a/grammar/pcre.rl b/grammar/pcre.rl
index 2a6191fe..4dfe0f80 100644
--- a/grammar/pcre.rl
+++ b/grammar/pcre.rl
@@ -39,6 +39,11 @@ struct value
struct value *next;
};
+enum ValueType
+{
+ Dot = 256,
+};
+
struct quantifier
{
@@ -280,9 +285,60 @@ int *grow_ragel_stack( int *size, int *stack )
quant_forms = '_';
+ alpha_char = [a-zA-Z];
+ digit_char = [0-9];
+
+ open_paren = '(';
+ close_paren = ')';
+
+ char_class_start = '[';
+ char_class_end = ']';
+
+ ampersand = '&';
+ colon = ':';
+ comma = ',';
+ dollar = '$';
+ dot = '.';
+ equals = '=';
+ exclamation = '!';
+ greater_than = '>';
+ hash = '#';
+ hyphen = '-';
+ less_than = '<';
+ pipe = '|';
+ single_quote = "'";
+ underscore = '_';
+
+ other_char_printable =
+ ' ' | '~' | ';' | '@' | '%' | '`' | '"' | '/';
+
+ other_char_non_printable = ^( 0 .. 127 );
+
+ capture_non_capture =
+ '(' @{ fcall paren_open; };
+
+ literal_sym =
+ comma | hyphen | less_than |
+ greater_than | single_quote | underscore | colon |
+ hash | equals | exclamation | ampersand;
+
+ action append_char {
+ append_element_value( s_term, *p );
+ }
+
+ literal = (
+ alpha_char |
+ digit_char |
+ literal_sym |
+ other_char_printable |
+ other_char_non_printable
+ ) @append_char;
+
atom =
- [a-z] @{ append_element_value( s_term, *p ); } |
- '(' @{ fcall paren_open; }
+ literal |
+ char_class_end @append_char |
+ dot @{ append_element_value( s_term, Dot ); } |
+ open_paren @{ fcall open_paren_forms; }
;
non_greedy = '_';
@@ -303,7 +359,7 @@ int *grow_ragel_stack( int *size, int *stack )
#
regex = expr;
- paren_open :=
+ open_paren_forms :=
# Look at the first few charcters to see what the form is. What we
# handle here:
# (re) capturing parens