From fc18d20f868ee7877a066cffb11e4ed50cb49c14 Mon Sep 17 00:00:00 2001 From: Tomoyuki Fujimori Date: Sat, 23 Nov 2013 10:52:55 -0500 Subject: use ULONG_MAX for 0xff in long int size --- test/utf8.lm | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 test/utf8.lm (limited to 'test') diff --git a/test/utf8.lm b/test/utf8.lm new file mode 100644 index 00000000..63962498 --- /dev/null +++ b/test/utf8.lm @@ -0,0 +1,30 @@ +##### LM ##### +lex + # http://www.ietf.org/rfc/rfc3629.txt + rl UTF8_tail / 0x80..0xBF / + rl UTF8_1 / 0x00..0x7F / + rl UTF8_2 / 0xC2..0xDF UTF8_tail / + rl UTF8_3 / 0xE0 0xA0..0xBF UTF8_tail | 0xE1..0xEC UTF8_tail{2} | + 0xED 0x80..0x9F UTF8_tail | 0xEE..0xEF UTF8_tail{2} / + rl UTF8_4 / 0xF0 0x90..0xBF UTF8_tail{2} | 0xF1..0xF3 UTF8_tail{3} | + 0xF4 0x80..0x8F UTF8_tail{2} / + token UTF8_char / UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 / +end + +global count: int = 0 + +def utf8_char + [UTF8_char] { count = count + 1 } + +def utf8_octets + [utf8_char*] + +parse P: utf8_octets[stdin] +print(count '\n') +##### IN ##### +A≢Α. +한국어 +日本語 +𣎴 +###### EXP ###### +16 -- cgit v1.2.1