summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorTomoyuki Fujimori <moyu@vaporoid.com>2013-11-23 10:52:55 -0500
committerAdrian Thurston <thurston@complang.org>2013-11-23 10:52:55 -0500
commitfc18d20f868ee7877a066cffb11e4ed50cb49c14 (patch)
tree03ac7ab77a8fcd5e29398c22ccdaa213d4762589 /test
parentf77d4e78943a9d62e84968627ed0763a98d69159 (diff)
downloadcolm-fc18d20f868ee7877a066cffb11e4ed50cb49c14.tar.gz
use ULONG_MAX for 0xff in long int size
Diffstat (limited to 'test')
-rw-r--r--test/utf8.lm30
1 files changed, 30 insertions, 0 deletions
diff --git a/test/utf8.lm b/test/utf8.lm
new file mode 100644
index 00000000..63962498
--- /dev/null
+++ b/test/utf8.lm
@@ -0,0 +1,30 @@
+##### LM #####
+lex
+ # http://www.ietf.org/rfc/rfc3629.txt
+ rl UTF8_tail / 0x80..0xBF /
+ rl UTF8_1 / 0x00..0x7F /
+ rl UTF8_2 / 0xC2..0xDF UTF8_tail /
+ rl UTF8_3 / 0xE0 0xA0..0xBF UTF8_tail | 0xE1..0xEC UTF8_tail{2} |
+ 0xED 0x80..0x9F UTF8_tail | 0xEE..0xEF UTF8_tail{2} /
+ rl UTF8_4 / 0xF0 0x90..0xBF UTF8_tail{2} | 0xF1..0xF3 UTF8_tail{3} |
+ 0xF4 0x80..0x8F UTF8_tail{2} /
+ token UTF8_char / UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 /
+end
+
+global count: int = 0
+
+def utf8_char
+ [UTF8_char] { count = count + 1 }
+
+def utf8_octets
+ [utf8_char*]
+
+parse P: utf8_octets[stdin]
+print(count '\n')
+##### IN #####
+A≢Α.
+한국어
+日本語
+𣎴
+###### EXP ######
+16