32 files changed, 288 insertions, 0 deletions
diff --git a/testsuite/tests/parser/unicode/1103.hs b/testsuite/tests/parser/unicode/1103.hs
new file mode 100644
index 0000000000..6d10064056
--- /dev/null
+++ b/testsuite/tests/parser/unicode/1103.hs
@@ -0,0 +1,13 @@
+{-# LANGUAGE UnicodeSyntax #-}
+{-
+	Three kinds of Unicode tests for our purposes.
+-}
+
+module UniTest where
+
+-- Non working Japanese Unicode test.
+
+てすと3 ∷ IO ()
+てすと3 = do
+	putStrLn $ show 人間虫 where
+	人間虫 = "humasect"
diff --git a/testsuite/tests/parser/unicode/1744.hs b/testsuite/tests/parser/unicode/1744.hs
new file mode 100644
index 0000000000..90273741da
--- /dev/null
+++ b/testsuite/tests/parser/unicode/1744.hs
@@ -0,0 +1,3 @@
+main = print hello
+-- test that layout has not been screwed up
+hello = "こんにちは 世界"
diff --git a/testsuite/tests/parser/unicode/1744.stdout b/testsuite/tests/parser/unicode/1744.stdout
new file mode 100644
index 0000000000..f127f8d21d
--- /dev/null
+++ b/testsuite/tests/parser/unicode/1744.stdout
@@ -0,0 +1 @@
+"\12371\12435\12395\12385\12399 \19990\30028"
diff --git a/testsuite/tests/parser/unicode/2302.hs b/testsuite/tests/parser/unicode/2302.hs
new file mode 100644
index 0000000000..c40c704cc3
--- /dev/null
+++ b/testsuite/tests/parser/unicode/2302.hs
@@ -0,0 +1 @@
+f = À
diff --git a/testsuite/tests/parser/unicode/2302.stderr b/testsuite/tests/parser/unicode/2302.stderr
new file mode 100644
index 0000000000..608c9ef0bd
--- /dev/null
+++ b/testsuite/tests/parser/unicode/2302.stderr
@@ -0,0 +1,2 @@
+
+2302.hs:1:5: Not in scope: data constructor `À'
diff --git a/testsuite/tests/parser/unicode/4373.hs b/testsuite/tests/parser/unicode/4373.hs
new file mode 100644
index 0000000000..a753432a41
--- /dev/null
+++ b/testsuite/tests/parser/unicode/4373.hs
@@ -0,0 +1,3 @@
+module ShouldCompile where
+
+test = let v₂ = (+) in v₂ 1 3
diff --git a/testsuite/tests/parser/unicode/Makefile b/testsuite/tests/parser/unicode/Makefile
new file mode 100644
index 0000000000..9101fbd40a
--- /dev/null
+++ b/testsuite/tests/parser/unicode/Makefile
@@ -0,0 +1,3 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T
new file mode 100644
index 0000000000..e5375a361e
--- /dev/null
+++ b/testsuite/tests/parser/unicode/all.T
@@ -0,0 +1,22 @@
+# test that we catch UTF-8 decoding errors
+
+test('utf8_002', normal, compile_fail, [''])
+test('utf8_003', normal, compile_fail, [''])
+test('utf8_004', normal, compile_fail, [''])
+test('utf8_005', normal, compile_fail, [''])
+
+test('utf8_010', normal, compile_fail, [''])
+test('utf8_011', normal, compile_fail, [''])
+
+test('utf8_020', normal, compile_fail, [''])
+test('utf8_021', normal, compile_fail, [''])
+test('utf8_022', normal, compile_fail, [''])
+
+# test that we can understand unicode characters in lexemes
+
+test('utf8_024', normal, compile_and_run, [''])
+
+test('1744', normal, compile_and_run, [''])
+test('1103', normal, compile, [''])
+test('2302', only_ways(['normal']), compile_fail, [''])
+test('4373', normal, compile, [''])
diff --git a/testsuite/tests/parser/unicode/utf8_001.hs b/testsuite/tests/parser/unicode/utf8_001.hs
new file mode 100644
index 0000000000..371e89e1fa
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_001.hs
@@ -0,0 +1,2 @@
+-- 0x80 is an invalid character
+bad = '�'
diff --git a/testsuite/tests/parser/unicode/utf8_001.stderr b/testsuite/tests/parser/unicode/utf8_001.stderr
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_001.stderr
diff --git a/testsuite/tests/parser/unicode/utf8_002.hs b/testsuite/tests/parser/unicode/utf8_002.hs
new file mode 100644
index 0000000000..589da832f2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_002.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xC0
+�
+\ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_002.stderr b/testsuite/tests/parser/unicode/utf8_002.stderr
new file mode 100644
index 0000000000..d8083f0c89
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_002.stderr
@@ -0,0 +1,2 @@
+
+utf8_002.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_003.hs b/testsuite/tests/parser/unicode/utf8_003.hs
new file mode 100644
index 0000000000..bd8e2f552c
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_003.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xD0
+�
+\ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_003.stderr b/testsuite/tests/parser/unicode/utf8_003.stderr
new file mode 100644
index 0000000000..be433d9141
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_003.stderr
@@ -0,0 +1,2 @@
+
+utf8_003.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_004.hs b/testsuite/tests/parser/unicode/utf8_004.hs
new file mode 100644
index 0000000000..6a1a839246
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_004.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xE0
+�
+\ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_004.stderr b/testsuite/tests/parser/unicode/utf8_004.stderr
new file mode 100644
index 0000000000..aff8256549
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_004.stderr
@@ -0,0 +1,2 @@
+
+utf8_004.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_005.hs b/testsuite/tests/parser/unicode/utf8_005.hs
new file mode 100644
index 0000000000..e88fec5a4f
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_005.hs
@@ -0,0 +1,2 @@
+-- buffer ends in 0xF0
+�
+\ No newline at end of file
diff --git a/testsuite/tests/parser/unicode/utf8_005.stderr b/testsuite/tests/parser/unicode/utf8_005.stderr
new file mode 100644
index 0000000000..3d551bae44
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_005.stderr
@@ -0,0 +1,2 @@
+
+utf8_005.hs:2:1: lexical error (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_010.hs b/testsuite/tests/parser/unicode/utf8_010.hs
new file mode 100644
index 0000000000..371e89e1fa
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_010.hs
@@ -0,0 +1,2 @@
+-- 0x80 is an invalid character
+bad = '�'
diff --git a/testsuite/tests/parser/unicode/utf8_010.stderr b/testsuite/tests/parser/unicode/utf8_010.stderr
new file mode 100644
index 0000000000..a2bb9b52e2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_010.stderr
@@ -0,0 +1,3 @@
+
+utf8_010.hs:2:8:
+    lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_011.hs b/testsuite/tests/parser/unicode/utf8_011.hs
new file mode 100644
index 0000000000..5700e1db45
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_011.hs
@@ -0,0 +1,2 @@
+-- 0xbf is an invalid character
+bad = '�'
diff --git a/testsuite/tests/parser/unicode/utf8_011.stderr b/testsuite/tests/parser/unicode/utf8_011.stderr
new file mode 100644
index 0000000000..0b34980303
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_011.stderr
@@ -0,0 +1,3 @@
+
+utf8_011.hs:2:8:
+    lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_020.hs b/testsuite/tests/parser/unicode/utf8_020.hs
new file mode 100644
index 0000000000..eaefe622fa
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_020.hs
@@ -0,0 +1,2 @@
+-- A start sequence byte (0xC0) followed by an invalid continuation: 
+bad = "�."
diff --git a/testsuite/tests/parser/unicode/utf8_020.stderr b/testsuite/tests/parser/unicode/utf8_020.stderr
new file mode 100644
index 0000000000..7254106142
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_020.stderr
@@ -0,0 +1,3 @@
+
+utf8_020.hs:2:8:
+    lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_021.hs b/testsuite/tests/parser/unicode/utf8_021.hs
new file mode 100644
index 0000000000..639e0bfc63
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_021.hs
@@ -0,0 +1,2 @@
+-- A start sequence byte (0xE0) followed by an invalid continuation: 
+bad = "�."
diff --git a/testsuite/tests/parser/unicode/utf8_021.stderr b/testsuite/tests/parser/unicode/utf8_021.stderr
new file mode 100644
index 0000000000..2867239846
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_021.stderr
@@ -0,0 +1,3 @@
+
+utf8_021.hs:2:8:
+    lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_022.hs b/testsuite/tests/parser/unicode/utf8_022.hs
new file mode 100644
index 0000000000..6484a03c40
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_022.hs
@@ -0,0 +1,2 @@
+-- A start sequence byte (0xE0) followed by an invalid continuation: 
+bad = "�."
diff --git a/testsuite/tests/parser/unicode/utf8_022.stderr b/testsuite/tests/parser/unicode/utf8_022.stderr
new file mode 100644
index 0000000000..3f84d06de2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_022.stderr
@@ -0,0 +1,3 @@
+
+utf8_022.hs:2:8:
+    lexical error in string/character literal (UTF-8 decoding error)
diff --git a/testsuite/tests/parser/unicode/utf8_023.hs b/testsuite/tests/parser/unicode/utf8_023.hs
new file mode 100644
index 0000000000..255d48b741
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_023.hs
@@ -0,0 +1,2 @@
+-- some incomplete sequences concatenated
+bad = "����������"
diff --git a/testsuite/tests/parser/unicode/utf8_023.stderr b/testsuite/tests/parser/unicode/utf8_023.stderr
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_023.stderr
diff --git a/testsuite/tests/parser/unicode/utf8_024.hs b/testsuite/tests/parser/unicode/utf8_024.hs
new file mode 100644
index 0000000000..1e491f75ec
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_024.hs
@@ -0,0 +1,194 @@
+{-# LANGUAGE ExplicitForAll, UnicodeSyntax #-}
+{-
+Test for valid unicode identifiers
+
+*** This file is UTF-8 encoded.
+
+*** BE CAREFUL WHEN EDITING THIS FILE WITH EMACS.  Emacs' UTF-8 engine
+    has several times got the encoding wrong for me and inserted bogus
+    bytes, especially in the 4-byte characters.  Edit the file literally
+    (M-x find-file-literally).  By all means view it in Emacs' UTF-8
+    mode (C-x RET c utf-8, C-x f unicode001.hs), but don't edit and save.
+
+Here's a selection of characters I pulled from UnicodeData.txt that we
+can use to test with:
+
+-- upper/lower case letters
+À	LATIN CAPITAL LETTER A WITH GRAVE;Lu;0;L;0041 0300;;;;N;LATIN CAPITAL LETTER A GRAVE;;;00E0;
+à	LATIN SMALL LETTER A WITH GRAVE;Ll;0;L;0061 0300;;;;N;LATIN SMALL LETTER A GRAVE;;00C0;;00C0
+
+Α	GREEK CAPITAL LETTER ALPHA;Lu;0;L;;;;;N;;;;03B1;
+α	GREEK SMALL LETTER ALPHA;Ll;0;L;;;;;N;;;0391;;0391
+α	GREEK SMALL LETTER ALPHA;Ll;0;L;;;;;N;;;0391;;0391
+β	GREEK SMALL LETTER BETA;Ll;0;L;;;;;N;;;0392;;0392
+γ	GREEK SMALL LETTER GAMMA;Ll;0;L;;;;;N;;;0393;;0393
+δ	GREEK SMALL LETTER DELTA;Ll;0;L;;;;;N;;;0394;;0394
+
+Ⴀ	GEORGIAN CAPITAL LETTER AN;Lu;0;L;;;;;N;;Khutsuri;;;
+ა	GEORGIAN LETTER AN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER AN;;;;
+
+Ϣ	COPTIC CAPITAL LETTER SHEI;Lu;0;L;;;;;N;GREEK CAPITAL LETTER SHEI;;;03E3;
+ϣ	COPTIC SMALL LETTER SHEI;Ll;0;L;;;;;N;GREEK SMALL LETTER SHEI;;03E2;;03E2
+
+А	CYRILLIC CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0430;
+а	CYRILLIC SMALL LETTER A;Ll;0;L;;;;;N;;;0410;;0410
+
+Ա	ARMENIAN CAPITAL LETTER AYB;Lu;0;L;;;;;N;;;;0561;
+ա	ARMENIAN SMALL LETTER AYB;Ll;0;L;;;;;N;;;0531;;0531
+
+𝐴	MATHEMATICAL ITALIC CAPITAL A;Lu;0;L;<font> 0041;;;;N;;;;;
+𝑎	MATHEMATICAL ITALIC SMALL A;Ll;0;L;<font> 0061;;;;N;;;;;
+
+𝔸	MATHEMATICAL DOUBLE-STRUCK CAPITAL A;Lu;0;L;<font> 0041;;;;N;;;;;
+𝕒	MATHEMATICAL DOUBLE-STRUCK SMALL A;Ll;0;L;<font> 0061;;;;N;;;;;
+
+-- title case letters
+ǅ	LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;Lt;0;L;<compat> 0044 017E;;;;N;LATIN LETTER CAPITAL D SMALL Z HACEK;;01C4;01C6;01C5
+ǈ	LATIN CAPITAL LETTER L WITH SMALL LETTER J;Lt;0;L;<compat> 004C 006A;;;;N;LATIN LETTER CAPITAL L SMALL J;;01C7;01C9;01C8
+
+-- small caps
+ᴀ	LATIN LETTER SMALL CAPITAL A;Ll;0;L;;;;;N;;;;;
+ᴦ	GREEK LETTER SMALL CAPITAL GAMMA;Ll;0;L;;;;;N;;;;;
+
+-- caseless letters
+ଅ	;ORIYA LETTER A;Lo;0;L;;;;;N;;;;;
+அ	TAMIL LETTER A;Lo;0;L;;;;;N;;;;;
+అ	TELUGU LETTER A;Lo;0;L;;;;;N;;;;;
+ಅ	KANNADA LETTER A;Lo;0;L;;;;;N;;;;;
+അ	MALAYALAM LETTER A;Lo;0;L;;;;;N;;;;;
+අ	SINHALA LETTER AYANNA;Lo;0;L;;;;;N;;;;;
+ก	THAI CHARACTER KO KAI;Lo;0;L;;;;;N;THAI LETTER KO KAI;;;;
+ກ	LAO LETTER KO;Lo;0;L;;;;;N;;;;;
+ཀ	TIBETAN LETTER KA;Lo;0;L;;;;;N;;;;;
+က	MYANMAR LETTER KA;Lo;0;L;;;;;N;;;;;
+ᄀ	HANGUL CHOSEONG KIYEOK;Lo;0;L;;;;;N;;g *;;;
+ሀ	ETHIOPIC SYLLABLE HA;Lo;0;L;;;;;N;;;;;
+Ꭰ	CHEROKEE LETTER A;Lo;0;L;;;;;N;;;;;
+ᐁ	CANADIAN SYLLABICS E;Lo;0;L;;;;;N;;;;;
+ᚁ	OGHAM LETTER BEITH;Lo;0;L;;;;;N;;;;;
+ᚠ	RUNIC LETTER FEHU FEOH FE F;Lo;0;L;;;;;N;;;;;
+ᜀ	TAGALOG LETTER A;Lo;0;L;;;;;N;;;;;
+ᜠ	HANUNOO LETTER A;Lo;0;L;;;;;N;;;;;
+ᝀ	BUHID LETTER A;Lo;0;L;;;;;N;;;;;
+ᝠ	TAGBANWA LETTER A;Lo;0;L;;;;;N;;;;;
+ក	KHMER LETTER KA;Lo;0;L;;;;;N;;;;;
+ᠠ	MONGOLIAN LETTER A;Lo;0;L;;;;;N;;;;;
+ᤁ	LIMBU LETTER KA;Lo;0;L;;;;;N;;;;;
+ᥐ	TAI LE LETTER KA;Lo;0;L;;;;;N;;;;;
+ぁ	HIRAGANA LETTER SMALL A;Lo;0;L;;;;;N;;;;;
+ア	KATAKANA LETTER A;Lo;0;L;;;;;N;;;;;
+ㄅ	BOPOMOFO LETTER B;Lo;0;L;;;;;N;;;;;
+ㄱ	HANGUL LETTER KIYEOK;Lo;0;L;<compat> 1100;;;;N;HANGUL LETTER GIYEOG;;;;
+ㆠ	BOPOMOFO LETTER BU;Lo;0;L;;;;;N;;;;;
+ꀀ	YI SYLLABLE IT;Lo;0;L;;;;;N;;;;;
+
+-- spaces
+ 	NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
+ 	EN QUAD;Zs;0;WS;2002;;;;N;;;;;
+ 	EN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
+ 	THIN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
+	ZERO WIDTH SPACE;Zs;0;BN;;;;;N;;;;;
+
+-- some symbols we might find useful in Haskell
+←	LEFTWARDS ARROW;Sm;0;ON;;;;;N;LEFT ARROW;;;;
+→	RIGHTWARDS ARROW;Sm;0;ON;;;;;N;RIGHT ARROW;;;;
+‖	DOUBLE VERTICAL LINE;Po;0;ON;;;;;N;DOUBLE VERTICAL BAR;;;;
+∀	FOR ALL;Sm;0;ON;;;;;N;;;;;
+∁	COMPLEMENT;Sm;0;ON;;;;;Y;;;;;
+∃	THERE EXISTS;Sm;0;ON;;;;;Y;;;;;
+∄	THERE DOES NOT EXIST;Sm;0;ON;2203 0338;;;;Y;;;;;
+∅	EMPTY SET;Sm;0;ON;;;;;N;;;;;
+∆	INCREMENT;Sm;0;ON;;;;;N;;;;;
+∇	NABLA;Sm;0;ON;;;;;N;;;;;
+∈	ELEMENT OF;Sm;0;ON;;;;;Y;;;;;
+∉	NOT AN ELEMENT OF;Sm;0;ON;2208 0338;;;;Y;;;;;
+∏	N-ARY PRODUCT;Sm;0;ON;;;;;N;;;;;
+∑	N-ARY SUMMATION;Sm;0;ON;;;;;Y;;;;;
+−	MINUS SIGN;Sm;0;ET;;;;;N;;;;;
+∓	MINUS-OR-PLUS SIGN;Sm;0;ET;;;;;N;;;;;
+∕	DIVISION SLASH;Sm;0;ON;;;;;Y;;;;;
+∘	RING OPERATOR;Sm;0;ON;;;;;N;;;;;
+∙	BULLET OPERATOR;Sm;0;ON;;;;;N;;;;;
+√	SQUARE ROOT;Sm;0;ON;;;;;Y;;;;;
+∧	LOGICAL AND;Sm;0;ON;;;;;N;;;;;
+∨	LOGICAL OR;Sm;0;ON;;;;;N;;;;;
+∩	INTERSECTION;Sm;0;ON;;;;;N;;;;;
+∪	UNION;Sm;0;ON;;;;;N;;;;;
+≃	ASYMPTOTICALLY EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+≈	ALMOST EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+≠	NOT EQUAL TO;Sm;0;ON;003D 0338;;;;Y;;;;;
+≙	ESTIMATES;Sm;0;ON;;;;;N;;;;;
+≤	LESS-THAN OR EQUAL TO;Sm;0;ON;;;;;Y;LESS THAN OR EQUAL TO;;;;
+≥	GREATER-THAN OR EQUAL TO;Sm;0;ON;;;;;Y;GREATER THAN OR EQUAL TO;;;;
+≪	MUCH LESS-THAN;Sm;0;ON;;;;;Y;MUCH LESS THAN;;;;
+≫	MUCH GREATER-THAN;Sm;0;ON;;;;;Y;MUCH GREATER THAN;;;;
+⊂	SUBSET OF;Sm;0;ON;;;;;Y;;;;;
+⊃	SUPERSET OF;Sm;0;ON;;;;;Y;;;;;
+⊄	NOT A SUBSET OF;Sm;0;ON;2282 0338;;;;Y;;;;;
+⊅	NOT A SUPERSET OF;Sm;0;ON;2283 0338;;;;Y;;;;;
+⊆	SUBSET OF OR EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+⊇	SUPERSET OF OR EQUAL TO;Sm;0;ON;;;;;Y;;;;;
+⊕	CIRCLED PLUS;Sm;0;ON;;;;;N;;;;;
+⊖	CIRCLED MINUS;Sm;0;ON;;;;;N;;;;;
+⊗	CIRCLED TIMES;Sm;0;ON;;;;;N;;;;;
+⊘	CIRCLED DIVISION SLASH;Sm;0;ON;;;;;Y;;;;;
+⊙	CIRCLED DOT OPERATOR;Sm;0;ON;;;;;N;;;;;
+⊢	RIGHT TACK;Sm;0;ON;;;;;Y;;;;;
+⊣	LEFT TACK;Sm;0;ON;;;;;Y;;;;;
+⊤	DOWN TACK;Sm;0;ON;;;;;N;;;;;
+⊥	UP TACK;Sm;0;ON;;;;;N;;;;;
+⊦	ASSERTION;Sm;0;ON;;;;;Y;;;;;
+⊧	MODELS;Sm;0;ON;;;;;Y;;;;;
+⊨	TRUE;Sm;0;ON;;;;;Y;;;;;
+⋂	N-ARY INTERSECTION;Sm;0;ON;;;;;N;;;;;
+⋃	N-ARY UNION;Sm;0;ON;;;;;N;;;;;
+⋅	DOT OPERATOR;Sm;0;ON;;;;;N;;;;;
+⋯	MIDLINE HORIZONTAL ELLIPSIS;Sm;0;ON;;;;;N;;;;;
+〈	LEFT-POINTING ANGLE BRACKET;Ps;0;ON;3008;;;;Y;BRA;;;;
+〉	RIGHT-POINTING ANGLE BRACKET;Pe;0;ON;3009;;;;Y;KET;;;;
+☹	WHITE FROWNING FACE;So;0;ON;;;;;N;;;;;
+☺	WHITE SMILING FACE;So;0;ON;;;;;N;;;;;
+⧺	DOUBLE PLUS;Sm;0;ON;;;;;N;;;;;
+
+-- other random symbols
+☣	BIOHAZARD SIGN;So;0;ON;;;;;N;;;;;
+𝄬	MUSICAL SYMBOL FLAT UP;So;0;L;;;;;N;;;;;
+𝌋	TETRAGRAM FOR CONTRARIETY;So;0;ON;;;;;N;;;;;
+
+-- braille
+⡍	;BRAILLE PATTERN DOTS-1347;So;0;ON;;;;;N;;;;;
+⣿	;BRAILLE PATTERN DOTS-12345678;So;0;ON;;;;;N;;;;;
+
+-- numbers
+Ⅰ	;ROMAN NUMERAL ONE;Nl;0;L;<compat> 0049;;;1;N;;;;2170;
+Ⅼ	;ROMAN NUMERAL FIFTY;Nl;0;L;<compat> 004C;;;50;N;;;;217C;
+①	;CIRCLED DIGIT ONE;No;0;EN;<circle> 0031;;1;1;N;;;;;
+⑴	;PARENTHESIZED DIGIT ONE;No;0;EN;<compat> 0028 0031 0029;;1;1;N;;;;;
+⒈	;DIGIT ONE FULL STOP;No;0;EN;<compat> 0031 002E;;1;1;N;DIGIT ONE PERIOD;;;;
+-}
+
+module Main where
+
+-- Test upper-case recognition:
+data T 
+  = À		-- latin
+  | Α		-- greek
+  | Ⴀ		-- georgian
+  | Ϣ		-- coptic
+  | А		-- cyrillic
+  | Ա		-- armenian
+  | 𝐴	-- maths italic
+  | 𝔸	-- maths double-struck
+  | ǅ		-- title case latin
+
+-- Test lower-case recognition:
+à α ϣ а ա 𝑎 𝕒 ᴀ ᴦ = undefined
+
+-- Caseless characters in a string:
+string = "ଅஅఅಅഅඅกກཀကᄀሀᎠᐁᚁᚠᜀᜠᝀᝠកᠠᤁᥐぁアㄅㄱㆠ" -- 29 chars
+
+-- composition using a ring, greek type variables, and right arrows
+(∘) :: ∀ α β γ . (β → γ) → (α → β) → (α → γ)
+(f ∘ g) x = f (g x)
+
+main = print ∘ length $ string
diff --git a/testsuite/tests/parser/unicode/utf8_024.stdout b/testsuite/tests/parser/unicode/utf8_024.stdout
new file mode 100644
index 0000000000..f04c001f3f
--- /dev/null
+++ b/testsuite/tests/parser/unicode/utf8_024.stdout
@@ -0,0 +1 @@
+29