summaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2019-11-30 10:52:10 -0800
committerAdrian Thurston <thurston@colm.net>2019-11-30 10:52:10 -0800
commit479376c313208ee764d212846892cb904b8e5b89 (patch)
treee3733bbff990763604f5fd65a9323fdc081ad1d8 /grammar
parentdac2c1d90de23ca6cb3db88992885b116199a848 (diff)
downloadcolm-479376c313208ee764d212846892cb904b8e5b89.tar.gz
rust grammar: a proper implementation of raw strings
Diffstat (limited to 'grammar')
-rw-r--r--grammar/rust.lm49
1 files changed, 41 insertions, 8 deletions
diff --git a/grammar/rust.lm b/grammar/rust.lm
index b5cc7bb6..f6dd8c15 100644
--- a/grammar/rust.lm
+++ b/grammar/rust.lm
@@ -50,20 +50,53 @@ lex
[0-9]+ '.' [0-9]+ float_exponent? |
[0-9]+ ( '.' [0-9]+ )? float_exponent? float_suffix
/
-
- token raw_string /
- 'r"' ( any* :>> '"' ) |
- 'r#"' ( any* :>> '"#' ) |
- 'r##"' ( any* :>> '"##' ) |
- 'r###"' ( any* :>> '"###' ) |
- 'r####"' ( any* :>> '"####' ) |
- 'r#####"' ( any* :>> '"#####' ) /
+
+ # Raw open. Rest handled in a its own lexical region.
+ token raw_open / 'r' '#' * '"' /
+ {
+ # Stash the length (not including r) for comparison against potential
+ # close strings.
+ RawOpenLength = match_length - 1
+ RawOpen: str = input->pull( match_length )
+ input->push( make_token( typeid<raw_open>, RawOpen ) )
+ }
ignore / "//" [^\n]* '\n' /
ignore / "/*" any* :>> "*/" /
ignore / [ \t\n]+ /
end
+# Raw strings.
+def raw_string
+ [raw_open raw_content* raw_close]
+
+global RawOpenLength: int = 0
+
+# Lexical region dedicated to raw strings. Attempts to close by matching
+# candidates and then testing the length.
+lex
+ token raw_close / '"' '#'* /
+ {
+ # Check the length. We use >= to match the close because rust is lazy
+ # in matching it. If it is longer we just chop it. Probably will result
+ # in a parse error.
+ if match_length >= RawOpenLength {
+ # Chop it by using RawOpenLength in the pull from input.
+ Candidate: str = input->pull( RawOpenLength )
+ input->push( make_token( typeid<raw_close>, Candidate ) )
+ }
+ else {
+ # Otherwise just send it as raw content.
+ Candidate: str = input->pull( match_length )
+ input->push( make_token( typeid<raw_content>, Candidate ) )
+ }
+ }
+
+ # Content, send out strings not containing # or ". Or single such chars
+ # that are not part of a sequence that is first matched by close candidate.
+ token raw_content / [^"#]+ | any /
+end
+
namespace attr
lex
token id / [A-Za-z_] [A-Za-z_0-9]* /