diff options
-rw-r--r-- | lib/elixir/src/elixir_tokenizer.erl | 13 | ||||
-rw-r--r-- | lib/elixir/test/elixir/kernel/errors_test.exs | 7 | ||||
-rw-r--r-- | lib/elixir/unicode/tokenizer.ex | 4 |
3 files changed, 23 insertions, 1 deletions
diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index c5b810625..aef779e6f 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -861,10 +861,23 @@ tokenize_identifier(String, Line, Scope) -> {error, _Reason} = Error -> Error end; + {error, {not_nfc, Wrong}} -> + Right = unicode:characters_to_nfc_list(Wrong), + RightCodepoints = list_to_codepoint_hex(Right), + WrongCodepoints = list_to_codepoint_hex(Wrong), + Message = io_lib:format("Elixir expects unquoted Unicode atoms and variables to be in NFC form.\n" + "Got: \"~ts\" (codepoints~ts)\n" + "Expected: \"~ts\" (codepoints~ts)\n + Syntax error before: ", + [Wrong, WrongCodepoints, Right, RightCodepoints]), + {error, {Line, Message, Wrong}}; {error, empty} -> empty end. +list_to_codepoint_hex(List) -> + [io_lib:format(" ~4.16.0B", [Codepoint]) || Codepoint <- List]. + tokenize_alias(Rest, Line, Column, Atom, Length, Ascii, Special, Scope, Tokens) -> if not Ascii -> diff --git a/lib/elixir/test/elixir/kernel/errors_test.exs b/lib/elixir/test/elixir/kernel/errors_test.exs index ecfffa49f..ecf25a766 100644 --- a/lib/elixir/test/elixir/kernel/errors_test.exs +++ b/lib/elixir/test/elixir/kernel/errors_test.exs @@ -57,6 +57,13 @@ defmodule Kernel.ErrorsTest do if :erlang.system_info(:otp_release) >= '20' do message = "invalid character \"ó\" (codepoint U+00F3) in alias (only ascii characters are allowed): Foó" assert_compile_fail SyntaxError, message, 'Foó' + + message = """ + Elixir expects unquoted Unicode atoms and variables to be in NFC form. + Got: "foó" (codepoints 0066 006F 006F 0301) + Expected: "foó" (codepoints 0066 006F 00F3) + """ + assert_compile_fail SyntaxError, message, :unicode.characters_to_nfd_list("foó") end end diff --git a/lib/elixir/unicode/tokenizer.ex b/lib/elixir/unicode/tokenizer.ex index a3cf0bd38..9ffceb13e 100644 --- a/lib/elixir/unicode/tokenizer.ex +++ b/lib/elixir/unicode/tokenizer.ex @@ -141,8 +141,10 @@ defmodule String.Tokenizer do validate(continue(tail, [head], 1, true, []), :alias) ascii_start?(head) -> validate(continue(tail, [head], 1, true, []), :identifier) - unicode_start?(head) or unicode_upper?(head) -> + unicode_upper?(head) -> validate(continue(tail, [head], 1, false, []), :atom) + unicode_start?(head) -> + validate(continue(tail, [head], 1, false, []), :identifier) true -> {:error, :empty} end |