diff options
-rw-r--r-- | compiler/basicTypes/Lexeme.hs | 5 | ||||
-rw-r--r-- | compiler/parser/Lexer.x | 12 | ||||
-rw-r--r-- | testsuite/tests/parser/unicode/T7650.hs | 11 | ||||
-rw-r--r-- | testsuite/tests/parser/unicode/T7650.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/parser/unicode/all.T | 1 |
5 files changed, 22 insertions, 8 deletions
diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs index 9e75376dae..22515c172c 100644 --- a/compiler/basicTypes/Lexeme.hs +++ b/compiler/basicTypes/Lexeme.hs @@ -194,9 +194,10 @@ okIdChar c = case generalCategory c of LowercaseLetter -> True TitlecaseLetter -> True ModifierLetter -> True -- See #10196 - OtherLetter -> True + OtherLetter -> True -- See #1103 + NonSpacingMark -> True -- See #7650 DecimalNumber -> True - OtherNumber -> True + OtherNumber -> True -- See #4373 _ -> c == '\'' || c == '_' -- | Is this character acceptable in a symbol (after the first char)? diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 5f3bdee5fa..3f959f2a03 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -155,8 +155,8 @@ $binit = 0-1 $octit = 0-7 $hexit = [$decdigit A-F a-f] -$modifier = \x07 -- Trick Alex into handling Unicode. See alexGetByte. -$idchar = [$small $large $digit $modifier \'] +$uniidchar = \x07 -- Trick Alex into handling Unicode. See alexGetByte. +$idchar = [$small $large $digit $uniidchar \'] $pragmachar = [$small $large $digit] @@ -1874,10 +1874,10 @@ alexGetByte (AI loc s) symbol = '\x04' space = '\x05' other_graphic = '\x06' - modifier = '\x07' + uniidchar = '\x07' adj_c - | c <= '\x06' = non_graphic + | c <= '\x07' = non_graphic | c <= '\x7f' = c -- Alex doesn't handle Unicode, so when Unicode -- character is encountered we output these values @@ -1891,9 +1891,9 @@ alexGetByte (AI loc s) UppercaseLetter -> upper LowercaseLetter -> lower TitlecaseLetter -> upper - ModifierLetter -> modifier -- see #10196 + ModifierLetter -> uniidchar -- see #10196 OtherLetter -> lower -- see #1103 - NonSpacingMark -> other_graphic + NonSpacingMark -> uniidchar -- see #7650 SpacingCombiningMark -> other_graphic EnclosingMark -> other_graphic DecimalNumber -> digit diff --git a/testsuite/tests/parser/unicode/T7650.hs b/testsuite/tests/parser/unicode/T7650.hs new file mode 100644 index 0000000000..c474bc0645 --- /dev/null +++ b/testsuite/tests/parser/unicode/T7650.hs @@ -0,0 +1,11 @@ +main = print spın̈alTap + where spın̈alTap = 11 + +-- n̈ is a combining character sequence. We now allow it to be used in +-- identifiers (#7650). +-- +-- > map generalCategory "n̈" +-- [LowercaseLetter,NonSpacingMark] +-- +-- > map show "n̈" +-- ["'n'","'\776'"] diff --git a/testsuite/tests/parser/unicode/T7650.stdout b/testsuite/tests/parser/unicode/T7650.stdout new file mode 100644 index 0000000000..b4de394767 --- /dev/null +++ b/testsuite/tests/parser/unicode/T7650.stdout @@ -0,0 +1 @@ +11 diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T index 6972a0d602..36554cc143 100644 --- a/testsuite/tests/parser/unicode/all.T +++ b/testsuite/tests/parser/unicode/all.T @@ -25,3 +25,4 @@ test('T7671', normal, compile, ['']) # TODO: This test ought to be run in a non-UTF8 locale, but this is not yet # supported by the test suite (see 10907) test('T10907', normal, compile, ['']) +test('T7650', normal, compile, ['']) |