summaryrefslogtreecommitdiff
path: root/testsuite/tests/parser/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'testsuite/tests/parser/unicode')
-rw-r--r--testsuite/tests/parser/unicode/1103.hs13
-rw-r--r--testsuite/tests/parser/unicode/1744.hs3
-rw-r--r--testsuite/tests/parser/unicode/1744.stdout1
-rw-r--r--testsuite/tests/parser/unicode/2302.hs1
-rw-r--r--testsuite/tests/parser/unicode/2302.stderr2
-rw-r--r--testsuite/tests/parser/unicode/4373.hs3
-rw-r--r--testsuite/tests/parser/unicode/Makefile3
-rw-r--r--testsuite/tests/parser/unicode/all.T22
-rw-r--r--testsuite/tests/parser/unicode/utf8_001.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_001.stderr0
-rw-r--r--testsuite/tests/parser/unicode/utf8_002.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_002.stderr2
-rw-r--r--testsuite/tests/parser/unicode/utf8_003.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_003.stderr2
-rw-r--r--testsuite/tests/parser/unicode/utf8_004.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_004.stderr2
-rw-r--r--testsuite/tests/parser/unicode/utf8_005.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_005.stderr2
-rw-r--r--testsuite/tests/parser/unicode/utf8_010.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_010.stderr3
-rw-r--r--testsuite/tests/parser/unicode/utf8_011.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_011.stderr3
-rw-r--r--testsuite/tests/parser/unicode/utf8_020.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_020.stderr3
-rw-r--r--testsuite/tests/parser/unicode/utf8_021.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_021.stderr3
-rw-r--r--testsuite/tests/parser/unicode/utf8_022.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_022.stderr3
-rw-r--r--testsuite/tests/parser/unicode/utf8_023.hs2
-rw-r--r--testsuite/tests/parser/unicode/utf8_023.stderr0
-rw-r--r--testsuite/tests/parser/unicode/utf8_024.hs194
-rw-r--r--testsuite/tests/parser/unicode/utf8_024.stdout1
32 files changed, 288 insertions, 0 deletions
diff --git a/testsuite/tests/parser/unicode/1103.hs b/testsuite/tests/parser/unicode/1103.hs
new file mode 100644
index 0000000000..6d10064056
--- /dev/null
+++ b/testsuite/tests/parser/unicode/1103.hs
@@ -0,0 +1,13 @@
+{-# LANGUAGE UnicodeSyntax #-}
+{-
+ Three kinds of Unicode tests for our purposes.
+-}
+
+module UniTest where
+
+-- Non working Japanese Unicode test.
+
+てすと3 ∷ IO ()
+てすと3 = do
+ putStrLn $ show 人間虫 where
+ 人間虫 = "humasect"
diff --git a/testsuite/tests/parser/unicode/1744.hs b/testsuite/tests/parser/unicode/1744.hs
new file mode 100644
index 0000000000..90273741da
--- /dev/null
+++ b/testsuite/tests/parser/unicode/1744.hs
@@ -0,0 +1,3 @@
+main = print hello
+-- test that layout has not been screwed up
+hello = "こんにちは 世界"
diff --git a/testsuite/tests/parser/unicode/1744.stdout b/testsuite/tests/parser/unicode/1744.stdout
new file mode 100644
index 0000000000..f127f8d21d
--- /dev/null
+++ b/testsuite/tests/parser/unicode/1744.stdout
@@ -0,0 +1 @@
+"\12371\12435\12395\12385\12399 \19990\30028"
diff --git a/testsuite/tests/parser/unicode/2302.hs b/testsuite/tests/parser/unicode/2302.hs
new file mode 100644
index 0000000000..c40c704cc3
--- /dev/null
+++ b/testsuite/tests/parser/unicode/2302.hs
@@ -0,0 +1 @@
+f = À
diff --git a/testsuite/tests/parser/unicode/2302.stderr b/testsuite/tests/parser/unicode/2302.stderr
new file mode 100644
index 0000000000..608c9ef0bd
--- /dev/null
+++ b/testsuite/tests/parser/unicode/2302.stderr
@@ -0,0 +1,2 @@
+
+2302.hs:1:5: Not in scope: data constructor `À'
diff --git a/testsuite/tests/parser/unicode/4373.hs b/testsuite/tests/parser/unicode/4373.hs
new file mode 100644
index 0000000000..a753432a41
--- /dev/null
+++ b/testsuite/tests/parser/unicode/4373.hs
@@ -0,0 +1,3 @@
+module ShouldCompile where
+
+test = let v₂ = (+) in v₂ 1 3
diff --git a/testsuite/tests/parser/unicode/Makefile b/testsuite/tests/parser/unicode/Makefile
new file mode 100644
index 0000000000..9101fbd40a
--- /dev/null
+++ b/testsuite/tests/parser/unicode/Makefile
@@ -0,0 +1,3 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T
new file mode 100644
index 0000000000..e5375a361e
--- /dev/null
+++ b/testsuite/tests/parser/unicode/all.T
@@ -0,0 +1,22 @@
+# test that we catch UTF-8 decoding errors
+
+test('utf8_002', normal, compile_fail, [''])
+test('utf8_003', normal, compile_fail, [''])
+test('utf8_004', normal, compile_fail, [''])
+test('utf8_005', normal, compile_fail, [''])
+
+test('utf8_010', normal, compile_fail, [''])
+test('utf8_011', normal, compile_fail, [''])
+
+test('utf8_020', normal, compile_fail, [''])
+test('utf8_021', normal, compile_fail, [''])
+test('utf8_022', normal, compile_fail, [''])
+
+# test that we can understand unicode characters in lexemes
+
+test('utf8_024', normal, compile_and_run, [''])
+
+test('1744', normal, compile_and_run, [''])
+test('1103', normal, compile, [''])
+test('2302', only_ways(['normal']), compile_fail, [''])
+test('4373', normal, compile, [''])
diff --git a/testsuite/tests/parser/unicode/utf8_001.hs b/testsuite/tests/parser/unicode/utf8_001.hs
new file mode 100644
index 0000000000..371e89e1fa
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_001.hs
@@ -0,0 +1,2 @@
+-- 0x80 is an invalid character
+bad = ''
diff --git a/testsuite/tests/parser/unicode/utf8_001.stderr b/testsuite/tests/parser/unicode/utf8_001.stderr
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_001.stderr
diff --git a/testsuite/tests/parser/unicode/utf8_002.hs b/testsuite/tests/parser/unicode/utf8_002.hs
new file mode 100644
index 0000000000..589da832f2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_002.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xC0
+ \ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_002.stderr b/testsuite/tests/parser/unicode/utf8_002.stderr
new file mode 100644
index 0000000000..d8083f0c89
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_002.stderr
@@ -0,0 +1,2 @@
+
+utf8_002.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_003.hs b/testsuite/tests/parser/unicode/utf8_003.hs
new file mode 100644
index 0000000000..bd8e2f552c
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_003.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xD0
+ \ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_003.stderr b/testsuite/tests/parser/unicode/utf8_003.stderr
new file mode 100644
index 0000000000..be433d9141
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_003.stderr
@@ -0,0 +1,2 @@
+
+utf8_003.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_004.hs b/testsuite/tests/parser/unicode/utf8_004.hs
new file mode 100644
index 0000000000..6a1a839246
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_004.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xE0
+ \ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_004.stderr b/testsuite/tests/parser/unicode/utf8_004.stderr
new file mode 100644
index 0000000000..aff8256549
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_004.stderr
@@ -0,0 +1,2 @@
+
+utf8_004.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_005.hs b/testsuite/tests/parser/unicode/utf8_005.hs
new file mode 100644
index 0000000000..e88fec5a4f
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_005.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xF0
+ \ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_005.stderr b/testsuite/tests/parser/unicode/utf8_005.stderr
new file mode 100644
index 0000000000..3d551bae44
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_005.stderr
@@ -0,0 +1,2 @@
+
+utf8_005.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_010.hs b/testsuite/tests/parser/unicode/utf8_010.hs
new file mode 100644
index 0000000000..371e89e1fa
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_010.hs
@@ -0,0 +1,2 @@
+-- 0x80 is an invalid character
+bad = ''
diff --git a/testsuite/tests/parser/unicode/utf8_010.stderr b/testsuite/tests/parser/unicode/utf8_010.stderr
new file mode 100644
index 0000000000..a2bb9b52e2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_010.stderr
@@ -0,0 +1,3 @@
+
+utf8_010.hs:2:8:
+ lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_011.hs b/testsuite/tests/parser/unicode/utf8_011.hs
new file mode 100644
index 0000000000..5700e1db45
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_011.hs
@@ -0,0 +1,2 @@
+-- 0xbf is an invalid character
+bad = ''
diff --git a/testsuite/tests/parser/unicode/utf8_011.stderr b/testsuite/tests/parser/unicode/utf8_011.stderr
new file mode 100644
index 0000000000..0b34980303
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_011.stderr
@@ -0,0 +1,3 @@
+
+utf8_011.hs:2:8:
+ lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_020.hs b/testsuite/tests/parser/unicode/utf8_020.hs
new file mode 100644
index 0000000000..eaefe622fa
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_020.hs
@@ -0,0 +1,2 @@
+-- A start sequence byte (0xC0) followed by an invalid continuation:
+bad = "."
diff --git a/testsuite/tests/parser/unicode/utf8_020.stderr b/testsuite/tests/parser/unicode/utf8_020.stderr
new file mode 100644
index 0000000000..7254106142
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_020.stderr
@@ -0,0 +1,3 @@
+
+utf8_020.hs:2:8:
+ lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_021.hs b/testsuite/tests/parser/unicode/utf8_021.hs
new file mode 100644
index 0000000000..639e0bfc63
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_021.hs
@@ -0,0 +1,2 @@
+-- A start sequence byte (0xE0) followed by an invalid continuation:
+bad = "."
diff --git a/testsuite/tests/parser/unicode/utf8_021.stderr b/testsuite/tests/parser/unicode/utf8_021.stderr
new file mode 100644
index 0000000000..2867239846
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_021.stderr
@@ -0,0 +1,3 @@
+
+utf8_021.hs:2:8:
+ lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_022.hs b/testsuite/tests/parser/unicode/utf8_022.hs
new file mode 100644
index 0000000000..6484a03c40
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_022.hs
@@ -0,0 +1,2 @@
+-- A start sequence byte (0xE0) followed by an invalid continuation:
+bad = "."
diff --git a/testsuite/tests/parser/unicode/utf8_022.stderr b/testsuite/tests/parser/unicode/utf8_022.stderr
new file mode 100644
index 0000000000..3f84d06de2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_022.stderr
@@ -0,0 +1,3 @@
+
+utf8_022.hs:2:8:
+ lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_023.hs b/testsuite/tests/parser/unicode/utf8_023.hs
new file mode 100644
index 0000000000..255d48b741
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_023.hs
@@ -0,0 +1,2 @@
+-- some incomplete sequences concatenated
+bad = ""
diff --git a/testsuite/tests/parser/unicode/utf8_023.stderr b/testsuite/tests/parser/unicode/utf8_023.stderr
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_023.stderr
diff --git a/testsuite/tests/parser/unicode/utf8_024.hs b/testsuite/tests/parser/unicode/utf8_024.hs
new file mode 100644
index 0000000000..1e491f75ec
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_024.hs
@@ -0,0 +1,194 @@
+{-# LANGUAGE ExplicitForAll, UnicodeSyntax #-}
+{-
+Test for valid unicode identifiers
+
+*** This file is UTF-8 encoded.
+
+*** BE CAREFUL WHEN EDITING THIS FILE WITH EMACS. Emacs' UTF-8 engine
+ has several times got the encoding wrong for me and inserted bogus
+ bytes, especially in the 4-byte characters. Edit the file literally
+ (M-x find-file-literally). By all means view it in Emacs' UTF-8
+ mode (C-x RET c utf-8, C-x f unicode001.hs), but don't edit and save.
+
+Here's a selection of characters I pulled from UnicodeData.txt that we
+can use to test with:
+
+-- upper/lower case letters
+À LATIN CAPITAL LETTER A WITH GRAVE;Lu;0;L;0041 0300;;;;N;LATIN CAPITAL LETTER A GRAVE;;;00E0;
+à LATIN SMALL LETTER A WITH GRAVE;Ll;0;L;0061 0300;;;;N;LATIN SMALL LETTER A GRAVE;;00C0;;00C0
+
+Α GREEK CAPITAL LETTER ALPHA;Lu;0;L;;;;;N;;;;03B1;
+α GREEK SMALL LETTER ALPHA;Ll;0;L;;;;;N;;;0391;;0391
+α GREEK SMALL LETTER ALPHA;Ll;0;L;;;;;N;;;0391;;0391
+β GREEK SMALL LETTER BETA;Ll;0;L;;;;;N;;;0392;;0392
+γ GREEK SMALL LETTER GAMMA;Ll;0;L;;;;;N;;;0393;;0393
+δ GREEK SMALL LETTER DELTA;Ll;0;L;;;;;N;;;0394;;0394
+
+Ⴀ GEORGIAN CAPITAL LETTER AN;Lu;0;L;;;;;N;;Khutsuri;;;
+ა GEORGIAN LETTER AN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER AN;;;;
+
+Ϣ COPTIC CAPITAL LETTER SHEI;Lu;0;L;;;;;N;GREEK CAPITAL LETTER SHEI;;;03E3;
+ϣ COPTIC SMALL LETTER SHEI;Ll;0;L;;;;;N;GREEK SMALL LETTER SHEI;;03E2;;03E2
+
+А CYRILLIC CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0430;
+а CYRILLIC SMALL LETTER A;Ll;0;L;;;;;N;;;0410;;0410
+
+Ա ARMENIAN CAPITAL LETTER AYB;Lu;0;L;;;;;N;;;;0561;
+ա ARMENIAN SMALL LETTER AYB;Ll;0;L;;;;;N;;;0531;;0531
+
+𝐴 MATHEMATICAL ITALIC CAPITAL A;Lu;0;L;<font> 0041;;;;N;;;;;
+𝑎 MATHEMATICAL ITALIC SMALL A;Ll;0;L;<font> 0061;;;;N;;;;;
+
+𝔸 MATHEMATICAL DOUBLE-STRUCK CAPITAL A;Lu;0;L;<font> 0041;;;;N;;;;;
+𝕒 MATHEMATICAL DOUBLE-STRUCK SMALL A;Ll;0;L;<font> 0061;;;;N;;;;;
+
+-- title case letters
+Dž LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;Lt;0;L;<compat> 0044 017E;;;;N;LATIN LETTER CAPITAL D SMALL Z HACEK;;01C4;01C6;01C5
+Lj LATIN CAPITAL LETTER L WITH SMALL LETTER J;Lt;0;L;<compat> 004C 006A;;;;N;LATIN LETTER CAPITAL L SMALL J;;01C7;01C9;01C8
+
+-- small caps
+ᴀ LATIN LETTER SMALL CAPITAL A;Ll;0;L;;;;;N;;;;;
+ᴦ GREEK LETTER SMALL CAPITAL GAMMA;Ll;0;L;;;;;N;;;;;
+
+-- caseless letters
+ଅ ;ORIYA LETTER A;Lo;0;L;;;;;N;;;;;
+அ TAMIL LETTER A;Lo;0;L;;;;;N;;;;;
+అ TELUGU LETTER A;Lo;0;L;;;;;N;;;;;
+ಅ KANNADA LETTER A;Lo;0;L;;;;;N;;;;;
+അ MALAYALAM LETTER A;Lo;0;L;;;;;N;;;;;
+අ SINHALA LETTER AYANNA;Lo;0;L;;;;;N;;;;;
+ก THAI CHARACTER KO KAI;Lo;0;L;;;;;N;THAI LETTER KO KAI;;;;
+ກ LAO LETTER KO;Lo;0;L;;;;;N;;;;;
+ཀ TIBETAN LETTER KA;Lo;0;L;;;;;N;;;;;
+က MYANMAR LETTER KA;Lo;0;L;;;;;N;;;;;
+ᄀ HANGUL CHOSEONG KIYEOK;Lo;0;L;;;;;N;;g *;;;
+ሀ ETHIOPIC SYLLABLE HA;Lo;0;L;;;;;N;;;;;
+Ꭰ CHEROKEE LETTER A;Lo;0;L;;;;;N;;;;;
+ᐁ CANADIAN SYLLABICS E;Lo;0;L;;;;;N;;;;;
+ᚁ OGHAM LETTER BEITH;Lo;0;L;;;;;N;;;;;
+ᚠ RUNIC LETTER FEHU FEOH FE F;Lo;0;L;;;;;N;;;;;
+ᜀ TAGALOG LETTER A;Lo;0;L;;;;;N;;;;;
+ᜠ HANUNOO LETTER A;Lo;0;L;;;;;N;;;;;
+ᝀ BUHID LETTER A;Lo;0;L;;;;;N;;;;;
+ᝠ TAGBANWA LETTER A;Lo;0;L;;;;;N;;;;;
+ក KHMER LETTER KA;Lo;0;L;;;;;N;;;;;
+ᠠ MONGOLIAN LETTER A;Lo;0;L;;;;;N;;;;;
+ᤁ LIMBU LETTER KA;Lo;0;L;;;;;N;;;;;
+ᥐ TAI LE LETTER KA;Lo;0;L;;;;;N;;;;;
+ぁ HIRAGANA LETTER SMALL A;Lo;0;L;;;;;N;;;;;
+ア KATAKANA LETTER A;Lo;0;L;;;;;N;;;;;
+ㄅ BOPOMOFO LETTER B;Lo;0;L;;;;;N;;;;;
+ㄱ HANGUL LETTER KIYEOK;Lo;0;L;<compat> 1100;;;;N;HANGUL LETTER GIYEOG;;;;
+ㆠ BOPOMOFO LETTER BU;Lo;0;L;;;;;N;;;;;
+ꀀ YI SYLLABLE IT;Lo;0;L;;;;;N;;;;;
+
+-- spaces
+  NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
+  EN QUAD;Zs;0;WS;2002;;;;N;;;;;
+  EN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
+  THIN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
+​ ZERO WIDTH SPACE;Zs;0;BN;;;;;N;;;;;
+
+-- some symbols we might find useful in Haskell
+← LEFTWARDS ARROW;Sm;0;ON;;;;;N;LEFT ARROW;;;;
+→ RIGHTWARDS ARROW;Sm;0;ON;;;;;N;RIGHT ARROW;;;;
+‖ DOUBLE VERTICAL LINE;Po;0;ON;;;;;N;DOUBLE VERTICAL BAR;;;;
+∀ FOR ALL;Sm;0;ON;;;;;N;;;;;
+∁ COMPLEMENT;Sm;0;ON;;;;;Y;;;;;
+∃ THERE EXISTS;Sm;0;ON;;;;;Y;;;;;
+∄ THERE DOES NOT EXIST;Sm;0;ON;2203 0338;;;;Y;;;;;
+∅ EMPTY SET;Sm;0;ON;;;;;N;;;;;
+∆ INCREMENT;Sm;0;ON;;;;;N;;;;;
+∇ NABLA;Sm;0;ON;;;;;N;;;;;
+∈ ELEMENT OF;Sm;0;ON;;;;;Y;;;;;
+∉ NOT AN ELEMENT OF;Sm;0;ON;2208 0338;;;;Y;;;;;
+∏ N-ARY PRODUCT;Sm;0;ON;;;;;N;;;;;
+∑ N-ARY SUMMATION;Sm;0;ON;;;;;Y;;;;;
+− MINUS SIGN;Sm;0;ET;;;;;N;;;;;
+∓ MINUS-OR-PLUS SIGN;Sm;0;ET;;;;;N;;;;;
+∕ DIVISION SLASH;Sm;0;ON;;;;;Y;;;;;
+∘ RING OPERATOR;Sm;0;ON;;;;;N;;;;;
+∙ BULLET OPERATOR;Sm;0;ON;;;;;N;;;;;
+√ SQUARE ROOT;Sm;0;ON;;;;;Y;;;;;
+∧ LOGICAL AND;Sm;0;ON;;;;;N;;;;;
+∨ LOGICAL OR;Sm;0;ON;;;;;N;;;;;
+∩ INTERSECTION;Sm;0;ON;;;;;N;;;;;
+∪ UNION;Sm;0;ON;;;;;N;;;;;
+≃ ASYMPTOTICALLY EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+≈ ALMOST EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+≠ NOT EQUAL TO;Sm;0;ON;003D 0338;;;;Y;;;;;
+≙ ESTIMATES;Sm;0;ON;;;;;N;;;;;
+≤ LESS-THAN OR EQUAL TO;Sm;0;ON;;;;;Y;LESS THAN OR EQUAL TO;;;;
+≥ GREATER-THAN OR EQUAL TO;Sm;0;ON;;;;;Y;GREATER THAN OR EQUAL TO;;;;
+≪ MUCH LESS-THAN;Sm;0;ON;;;;;Y;MUCH LESS THAN;;;;
+≫ MUCH GREATER-THAN;Sm;0;ON;;;;;Y;MUCH GREATER THAN;;;;
+⊂ SUBSET OF;Sm;0;ON;;;;;Y;;;;;
+⊃ SUPERSET OF;Sm;0;ON;;;;;Y;;;;;
+⊄ NOT A SUBSET OF;Sm;0;ON;2282 0338;;;;Y;;;;;
+⊅ NOT A SUPERSET OF;Sm;0;ON;2283 0338;;;;Y;;;;;
+⊆ SUBSET OF OR EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+⊇ SUPERSET OF OR EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+⊕ CIRCLED PLUS;Sm;0;ON;;;;;N;;;;;
+⊖ CIRCLED MINUS;Sm;0;ON;;;;;N;;;;;
+⊗ CIRCLED TIMES;Sm;0;ON;;;;;N;;;;;
+⊘ CIRCLED DIVISION SLASH;Sm;0;ON;;;;;Y;;;;;
+⊙ CIRCLED DOT OPERATOR;Sm;0;ON;;;;;N;;;;;
+⊢ RIGHT TACK;Sm;0;ON;;;;;Y;;;;;
+⊣ LEFT TACK;Sm;0;ON;;;;;Y;;;;;
+⊤ DOWN TACK;Sm;0;ON;;;;;N;;;;;
+⊥ UP TACK;Sm;0;ON;;;;;N;;;;;
+⊦ ASSERTION;Sm;0;ON;;;;;Y;;;;;
+⊧ MODELS;Sm;0;ON;;;;;Y;;;;;
+⊨ TRUE;Sm;0;ON;;;;;Y;;;;;
+⋂ N-ARY INTERSECTION;Sm;0;ON;;;;;N;;;;;
+⋃ N-ARY UNION;Sm;0;ON;;;;;N;;;;;
+⋅ DOT OPERATOR;Sm;0;ON;;;;;N;;;;;
+⋯ MIDLINE HORIZONTAL ELLIPSIS;Sm;0;ON;;;;;N;;;;;
+〈 LEFT-POINTING ANGLE BRACKET;Ps;0;ON;3008;;;;Y;BRA;;;;
+〉 RIGHT-POINTING ANGLE BRACKET;Pe;0;ON;3009;;;;Y;KET;;;;
+☹ WHITE FROWNING FACE;So;0;ON;;;;;N;;;;;
+☺ WHITE SMILING FACE;So;0;ON;;;;;N;;;;;
+⧺ DOUBLE PLUS;Sm;0;ON;;;;;N;;;;;
+
+-- other random symbols
+☣ BIOHAZARD SIGN;So;0;ON;;;;;N;;;;;
+𝄬 MUSICAL SYMBOL FLAT UP;So;0;L;;;;;N;;;;;
+𝌋 TETRAGRAM FOR CONTRARIETY;So;0;ON;;;;;N;;;;;
+
+-- braille
+⡍ ;BRAILLE PATTERN DOTS-1347;So;0;ON;;;;;N;;;;;
+⣿ ;BRAILLE PATTERN DOTS-12345678;So;0;ON;;;;;N;;;;;
+
+-- numbers
+Ⅰ ;ROMAN NUMERAL ONE;Nl;0;L;<compat> 0049;;;1;N;;;;2170;
+Ⅼ ;ROMAN NUMERAL FIFTY;Nl;0;L;<compat> 004C;;;50;N;;;;217C;
+① ;CIRCLED DIGIT ONE;No;0;EN;<circle> 0031;;1;1;N;;;;;
+⑴ ;PARENTHESIZED DIGIT ONE;No;0;EN;<compat> 0028 0031 0029;;1;1;N;;;;;
+⒈ ;DIGIT ONE FULL STOP;No;0;EN;<compat> 0031 002E;;1;1;N;DIGIT ONE PERIOD;;;;
+-}
+
+module Main where
+
+-- Test upper-case recognition:
+data T
+ = À -- latin
+ | Α -- greek
+ | Ⴀ -- georgian
+ | Ϣ -- coptic
+ | А -- cyrillic
+ | Ա -- armenian
+ | 𝐴 -- maths italic
+ | 𝔸 -- maths double-struck
+ | Dž -- title case latin
+
+-- Test lower-case recognition:
+à α ϣ а ա 𝑎 𝕒 ᴀ ᴦ = undefined
+
+-- Caseless characters in a string:
+string = "ଅஅఅಅഅඅกກཀကᄀሀᎠᐁᚁᚠᜀᜠᝀᝠកᠠᤁᥐぁアㄅㄱㆠ" -- 29 chars
+
+-- composition using a ring, greek type variables, and right arrows
+(∘) :: ∀ α β γ . (β → γ) → (α → β) → (α → γ)
+(f ∘ g) x = f (g x)
+
+main = print ∘ length $ string
diff --git a/testsuite/tests/parser/unicode/utf8_024.stdout b/testsuite/tests/parser/unicode/utf8_024.stdout
new file mode 100644
index 0000000000..f04c001f3f
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_024.stdout
@@ -0,0 +1 @@
+29