diff options
author | Tomoyuki Fujimori <moyu@vaporoid.com> | 2013-11-23 10:52:55 -0500 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2013-11-23 10:52:55 -0500 |
commit | fc18d20f868ee7877a066cffb11e4ed50cb49c14 (patch) | |
tree | 03ac7ab77a8fcd5e29398c22ccdaa213d4762589 /test | |
parent | f77d4e78943a9d62e84968627ed0763a98d69159 (diff) | |
download | colm-fc18d20f868ee7877a066cffb11e4ed50cb49c14.tar.gz |
use ULONG_MAX for 0xff in long int size
Diffstat (limited to 'test')
-rw-r--r-- | test/utf8.lm | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/test/utf8.lm b/test/utf8.lm new file mode 100644 index 00000000..63962498 --- /dev/null +++ b/test/utf8.lm @@ -0,0 +1,30 @@ +##### LM ##### +lex + # http://www.ietf.org/rfc/rfc3629.txt + rl UTF8_tail / 0x80..0xBF / + rl UTF8_1 / 0x00..0x7F / + rl UTF8_2 / 0xC2..0xDF UTF8_tail / + rl UTF8_3 / 0xE0 0xA0..0xBF UTF8_tail | 0xE1..0xEC UTF8_tail{2} | + 0xED 0x80..0x9F UTF8_tail | 0xEE..0xEF UTF8_tail{2} / + rl UTF8_4 / 0xF0 0x90..0xBF UTF8_tail{2} | 0xF1..0xF3 UTF8_tail{3} | + 0xF4 0x80..0x8F UTF8_tail{2} / + token UTF8_char / UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 / +end + +global count: int = 0 + +def utf8_char + [UTF8_char] { count = count + 1 } + +def utf8_octets + [utf8_char*] + +parse P: utf8_octets[stdin] +print(count '\n') +##### IN ##### +A≢Α. +한국어 +日本語 +𣎴 +###### EXP ###### +16 |