summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/elixir/src/elixir_tokenizer.erl13
-rw-r--r--lib/elixir/test/elixir/kernel/errors_test.exs7
-rw-r--r--lib/elixir/unicode/tokenizer.ex4
3 files changed, 23 insertions, 1 deletions
diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl
index c5b810625..aef779e6f 100644
--- a/lib/elixir/src/elixir_tokenizer.erl
+++ b/lib/elixir/src/elixir_tokenizer.erl
@@ -861,10 +861,23 @@ tokenize_identifier(String, Line, Scope) ->
{error, _Reason} = Error ->
Error
end;
+ {error, {not_nfc, Wrong}} ->
+ Right = unicode:characters_to_nfc_list(Wrong),
+ RightCodepoints = list_to_codepoint_hex(Right),
+ WrongCodepoints = list_to_codepoint_hex(Wrong),
+ Message = io_lib:format("Elixir expects unquoted Unicode atoms and variables to be in NFC form.\n"
+ "Got: \"~ts\" (codepoints~ts)\n"
+ "Expected: \"~ts\" (codepoints~ts)\n
+ Syntax error before: ",
+ [Wrong, WrongCodepoints, Right, RightCodepoints]),
+ {error, {Line, Message, Wrong}};
{error, empty} ->
empty
end.
+list_to_codepoint_hex(List) ->
+ [io_lib:format(" ~4.16.0B", [Codepoint]) || Codepoint <- List].
+
tokenize_alias(Rest, Line, Column, Atom, Length, Ascii, Special, Scope, Tokens) ->
if
not Ascii ->
diff --git a/lib/elixir/test/elixir/kernel/errors_test.exs b/lib/elixir/test/elixir/kernel/errors_test.exs
index ecfffa49f..ecf25a766 100644
--- a/lib/elixir/test/elixir/kernel/errors_test.exs
+++ b/lib/elixir/test/elixir/kernel/errors_test.exs
@@ -57,6 +57,13 @@ defmodule Kernel.ErrorsTest do
if :erlang.system_info(:otp_release) >= '20' do
message = "invalid character \"ó\" (codepoint U+00F3) in alias (only ascii characters are allowed): Foó"
assert_compile_fail SyntaxError, message, 'Foó'
+
+ message = """
+ Elixir expects unquoted Unicode atoms and variables to be in NFC form.
+ Got: "foó" (codepoints 0066 006F 006F 0301)
+ Expected: "foó" (codepoints 0066 006F 00F3)
+ """
+ assert_compile_fail SyntaxError, message, :unicode.characters_to_nfd_list("foó")
end
end
diff --git a/lib/elixir/unicode/tokenizer.ex b/lib/elixir/unicode/tokenizer.ex
index a3cf0bd38..9ffceb13e 100644
--- a/lib/elixir/unicode/tokenizer.ex
+++ b/lib/elixir/unicode/tokenizer.ex
@@ -141,8 +141,10 @@ defmodule String.Tokenizer do
validate(continue(tail, [head], 1, true, []), :alias)
ascii_start?(head) ->
validate(continue(tail, [head], 1, true, []), :identifier)
- unicode_start?(head) or unicode_upper?(head) ->
+ unicode_upper?(head) ->
validate(continue(tail, [head], 1, false, []), :atom)
+ unicode_start?(head) ->
+ validate(continue(tail, [head], 1, false, []), :identifier)
true ->
{:error, :empty}
end