diff options
author | Andrea Leopardi <an.leopardi@gmail.com> | 2017-08-22 16:26:54 +0200 |
---|---|---|
committer | Andrea Leopardi <an.leopardi@gmail.com> | 2017-08-22 16:26:54 +0200 |
commit | 9fa1d609633b5e4e8b0fc51712822c291cee55fc (patch) | |
tree | a2ffc8368d9cbd94f38773f4dcc07c884ffef6da | |
parent | dbaf38093c40d1b55ed6bdc4d7e5d0ac06e92d17 (diff) | |
download | elixir-al/meta-for-tokens.tar.gz |
Change the format of tokensal/meta-for-tokens
Right now, tokens are "{Token, Location}" or "{Token, Location, Value}".
This commit changes "Location" from "{Line, StartColumn, EndColumn}" to
"{Line, {StartColumn, EndColumn}, Meta}" where "Meta" can be anything.
This will be used for things such as storing the format of integers.
-rw-r--r-- | lib/elixir/src/elixir_interpolation.erl | 4 | ||||
-rw-r--r-- | lib/elixir/src/elixir_parser.yrl | 2 | ||||
-rw-r--r-- | lib/elixir/src/elixir_tokenizer.erl | 179 | ||||
-rw-r--r-- | lib/elixir/test/erlang/string_test.erl | 22 | ||||
-rw-r--r-- | lib/elixir/test/erlang/tokenizer_test.erl | 261 |
5 files changed, 258 insertions, 210 deletions
diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 099dabe66..a1f331235 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -45,7 +45,7 @@ extract(Line, Column, Scope, true, [$\\, $#, ${ | Rest], Buffer, Output, Last) - extract(Line, Column, Scope, true, [$#, ${ | Rest], Buffer, Output, Last) -> Output1 = build_string(Line, Buffer, Output), case elixir_tokenizer:tokenize(Rest, Line, Column + 2, Scope) of - {error, {{EndLine, _, EndColumn}, _, "}"}, [$} | NewRest], Tokens} -> + {error, {{EndLine, {_, EndColumn}, _}, _, "}"}, [$} | NewRest], Tokens} -> Output2 = build_interpol(Line, Column, EndColumn, Tokens, Output1), extract(EndLine, EndColumn, Scope, true, NewRest, [], Output2, Last); {error, Reason, _, _} -> @@ -211,4 +211,4 @@ build_string(_Line, Buffer, Output) -> [elixir_utils:characters_to_binary(lists:reverse(Buffer)) | Output]. build_interpol(Line, Column, EndColumn, Buffer, Output) -> - [{{Line, Column, EndColumn}, lists:reverse(Buffer)} | Output]. + [{{Line, {Column, EndColumn}, nil}, lists:reverse(Buffer)} | Output]. diff --git a/lib/elixir/src/elixir_parser.yrl b/lib/elixir/src/elixir_parser.yrl index 151668299..bc3fb28e4 100644 --- a/lib/elixir/src/elixir_parser.yrl +++ b/lib/elixir/src/elixir_parser.yrl @@ -623,7 +623,7 @@ Erlang code. meta_from_token(Token, Counter) -> [{counter, Counter} | meta_from_token(Token)]. meta_from_token(Token) -> meta_from_location(?location(Token)). -meta_from_location({Line, Column, EndColumn}) +meta_from_location({Line, {Column, EndColumn}, _}) when is_integer(Line), is_integer(Column), is_integer(EndColumn) -> [{line, Line}]. %% Handle metadata in literals diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 99f42836e..89f2eea9e 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -151,7 +151,7 @@ tokenize(String, Line, Opts) -> tokenize([], Line, Column, #elixir_tokenizer{terminators=[]}, Tokens) -> {ok, Line, Column, lists:reverse(Tokens)}; -tokenize([], EndLine, _Column, #elixir_tokenizer{terminators=[{Start, {StartLine, _, _}} | _]}, Tokens) -> +tokenize([], EndLine, _Column, #elixir_tokenizer{terminators=[{Start, {StartLine, {_, _}, _}} | _]}, Tokens) -> End = terminator(Start), Message = io_lib:format("missing terminator: ~ts (for \"~ts\" starting at line ~B)", [End, Start, StartLine]), {error, {EndLine, Message, []}, [], Tokens}; @@ -164,17 +164,32 @@ tokenize(("<<<<<<<" ++ _) = Original, Line, 1, _Scope, Tokens) -> % Base integers +% tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) -> +% {Rest, Number, OriginalRepresentation, Length} = tokenize_hex(T, [H], 1), +% Token = {integer, {Line, {Column, Column + 2 + Length}, Number}, OriginalRepresentation}, +% tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]); +% +% tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) -> +% {Rest, Number, OriginalRepresentation, Length} = tokenize_bin(T, [H], 1), +% Token = {integer, {Line, {Column, Column + 2 + Length}, Number}, OriginalRepresentation}, +% tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]); +% +% tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) -> +% {Rest, Number, OriginalRepresentation, Length} = tokenize_octal(T, [H], 1), +% Token = {integer, {Line, {Column, Column + 2 + Length}, Number}, OriginalRepresentation}, +% tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]); + tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) -> {Rest, Number, Length} = tokenize_hex(T, [H], 1), - tokenize(Rest, Line, Column + 2 + Length, Scope, [{hex, {Line, Column, Column + 2 + Length}, Number} | Tokens]); + tokenize(Rest, Line, Column + 2 + Length, Scope, [{hex, {Line, {Column, Column + 2 + Length}, nil}, Number} | Tokens]); tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) -> {Rest, Number, Length} = tokenize_bin(T, [H], 1), - tokenize(Rest, Line, Column + 2 + Length, Scope, [{binary, {Line, Column, Column + 2 + Length}, Number} | Tokens]); + tokenize(Rest, Line, Column + 2 + Length, Scope, [{binary, {Line, {Column, Column + 2 + Length}, nil}, Number} | Tokens]); tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) -> {Rest, Number, Length} = tokenize_octal(T, [H], 1), - tokenize(Rest, Line, Column + 2 + Length, Scope, [{octal, {Line, Column, Column + 2 + Length}, Number} | Tokens]); + tokenize(Rest, Line, Column + 2 + Length, Scope, [{octal, {Line, {Column, Column + 2 + Length}, nil}, Number} | Tokens]); % Comments @@ -182,7 +197,7 @@ tokenize([$# | String], Line, Column, Scope, Tokens) -> {Rest, Comment, Length} = tokenize_comment(String, [$#], 1), case Scope#elixir_tokenizer.preserve_comments of true -> - CommentToken = {comment, {Line, Column, Column + Length}, Comment}, + CommentToken = {comment, {Line, {Column, Column + Length}, nil}, Comment}, tokenize(Rest, Line, Column + Length, Scope, [CommentToken | Tokens]); false -> tokenize(Rest, Line, Column, Scope, Tokens) @@ -194,7 +209,8 @@ tokenize([$~, S, H, H, H | T] = Original, Line, Column, Scope, Tokens) when ?is_ case extract_heredoc_with_interpolation(Line, Column, Scope, ?is_downcase(S), T, H) of {ok, NewLine, NewColumn, Parts, Rest} -> {Final, Modifiers} = collect_modifiers(Rest, []), - tokenize(Final, NewLine, NewColumn, Scope, [{sigil, {Line, Column, NewColumn}, S, Parts, Modifiers, [H, H, H]} | Tokens]); + Token = {sigil, {Line, {Column, NewColumn}, nil}, S, Parts, Modifiers, [H, H, H]}, + tokenize(Final, NewLine, NewColumn, Scope, [Token | Tokens]); {error, Reason} -> {error, Reason, Original, Tokens} end; @@ -203,7 +219,8 @@ tokenize([$~, S, H | T] = Original, Line, Column, Scope, Tokens) when ?is_sigil( case elixir_interpolation:extract(Line, Column + 3, Scope, ?is_downcase(S), T, sigil_terminator(H)) of {NewLine, NewColumn, Parts, Rest} -> {Final, Modifiers} = collect_modifiers(Rest, []), - tokenize(Final, NewLine, NewColumn, Scope, [{sigil, {Line, Column, NewColumn}, S, Parts, Modifiers, [H]} | Tokens]); + Token = {sigil, {Line, {Column, NewColumn}, nil}, S, Parts, Modifiers, [H]}, + tokenize(Final, NewLine, NewColumn, Scope, [Token | Tokens]); {error, Reason} -> Sigil = [$~, S, H], interpolation_error(Reason, Original, Tokens, " (for sigil ~ts starting at line ~B)", [Sigil, Line]) @@ -226,7 +243,8 @@ tokenize([$~, S, H | _] = Original, Line, Column, _Scope, Tokens) when ?is_upcas tokenize([$?, $\\, H | T], Line, Column, Scope, Tokens) -> Char = elixir_interpolation:unescape_map(H), - tokenize(T, Line, Column + 3, Scope, [{char, {Line, Column, Column + 3}, Char} | Tokens]); + Token = {char, {Line, {Column, Column + 3}, nil}, Char}, + tokenize(T, Line, Column + 3, Scope, [Token | Tokens]); tokenize([$?, Char | T], Line, Column, Scope, Tokens) -> case handle_char(Char) of @@ -237,7 +255,8 @@ tokenize([$?, Char | T], Line, Column, Scope, Tokens) -> false -> ok end, - tokenize(T, Line, Column + 2, Scope, [{char, {Line, Column, Column + 2}, Char} | Tokens]); + Token = {char, {Line, {Column, Column + 2}, nil}, Char}, + tokenize(T, Line, Column + 2, Scope, [Token | Tokens]); % Heredocs @@ -257,45 +276,48 @@ tokenize([$' | T], Line, Column, Scope, Tokens) -> % Operator atoms tokenize("...:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, Column, Column + 4}, '...'} | Tokens]); + tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, {Column, Column + 4}, nil}, '...'} | Tokens]); tokenize("<<>>:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 5, Scope, [{kw_identifier, {Line, Column, Column + 5}, '<<>>'} | Tokens]); + tokenize(Rest, Line, Column + 5, Scope, [{kw_identifier, {Line, {Column, Column + 5}, nil}, '<<>>'} | Tokens]); tokenize("%{}:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, Column, Column + 4}, '%{}'} | Tokens]); + tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, {Column, Column + 4}, nil}, '%{}'} | Tokens]); tokenize("%:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 2, Scope, [{kw_identifier, {Line, Column, Column + 2}, '%'} | Tokens]); + tokenize(Rest, Line, Column + 2, Scope, [{kw_identifier, {Line, {Column, Column + 2}, nil}, '%'} | Tokens]); tokenize("{}:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + 3, Scope, [{kw_identifier, {Line, Column, Column + 3}, '{}'} | Tokens]); + tokenize(Rest, Line, Column + 3, Scope, [{kw_identifier, {Line, {Column, Column + 3}, nil}, '{}'} | Tokens]); tokenize(":..." ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, Column, Column + 4}, '...'} | Tokens]); + tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, {Column, Column + 4}, nil}, '...'} | Tokens]); tokenize(":<<>>" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 5, Scope, [{atom, {Line, Column, Column + 5}, '<<>>'} | Tokens]); + tokenize(Rest, Line, Column + 5, Scope, [{atom, {Line, {Column, Column + 5}, nil}, '<<>>'} | Tokens]); tokenize(":%{}" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, Column, Column + 4}, '%{}'} | Tokens]); + tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, {Column, Column + 4}, nil}, '%{}'} | Tokens]); tokenize(":%" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 2, Scope, [{atom, {Line, Column, Column + 2}, '%'} | Tokens]); + tokenize(Rest, Line, Column + 2, Scope, [{atom, {Line, {Column, Column + 2}, nil}, '%'} | Tokens]); tokenize(":{}" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 3, Scope, [{atom, {Line, Column, Column + 3}, '{}'} | Tokens]); + tokenize(Rest, Line, Column + 3, Scope, [{atom, {Line, {Column, Column + 3}, nil}, '{}'} | Tokens]); % ## Three Token Operators tokenize([$:, T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when ?unary_op3(T1, T2, T3); ?comp_op3(T1, T2, T3); ?and_op3(T1, T2, T3); ?or_op3(T1, T2, T3); ?arrow_op3(T1, T2, T3); ?three_op(T1, T2, T3) -> - tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, Column, Column + 4}, list_to_atom([T1, T2, T3])} | Tokens]); + Token = {atom, {Line, {Column, Column + 4}, nil}, list_to_atom([T1, T2, T3])}, + tokenize(Rest, Line, Column + 4, Scope, [Token | Tokens]); % ## Two Token Operators tokenize([$:, T1, T2 | Rest], Line, Column, Scope, Tokens) when ?comp_op2(T1, T2); ?rel_op2(T1, T2); ?and_op(T1, T2); ?or_op(T1, T2); ?arrow_op(T1, T2); ?in_match_op(T1, T2); ?two_op(T1, T2); ?stab_op(T1, T2); ?type_op(T1, T2) -> - tokenize(Rest, Line, Column + 3, Scope, [{atom, {Line, Column, Column + 3}, list_to_atom([T1, T2])} | Tokens]); + Token = {atom, {Line, {Column, Column + 3}, nil}, list_to_atom([T1, T2])}, + tokenize(Rest, Line, Column + 3, Scope, [Token | Tokens]); % ## Single Token Operators tokenize([$:, T | Rest], Line, Column, Scope, Tokens) when ?at_op(T); ?unary_op(T); ?capture_op(T); ?dual_op(T); ?mult_op(T); ?rel_op(T); ?match_op(T); ?pipe_op(T); T == $. -> - tokenize(Rest, Line, Column + 2, Scope, [{atom, {Line, Column, Column + 2}, list_to_atom([T])} | Tokens]); + Token = {atom, {Line, {Column, Column + 2}, nil}, list_to_atom([T])}, + tokenize(Rest, Line, Column + 2, Scope, [Token | Tokens]); % Stand-alone tokens @@ -304,7 +326,7 @@ tokenize("..." ++ Rest, Line, Column, Scope, Tokens) -> tokenize(Rest, Line, Column + 3, Scope, [Token | Tokens]); tokenize("=>" ++ Rest, Line, Column, Scope, Tokens) -> - tokenize(Rest, Line, Column + 2, Scope, add_token_with_nl({assoc_op, {Line, Column, Column + 2}, '=>'}, Tokens)); + tokenize(Rest, Line, Column + 2, Scope, add_token_with_nl({assoc_op, {Line, {Column, Column + 2}, nil}, '=>'}, Tokens)); % ## Three token operators tokenize([T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when ?unary_op3(T1, T2, T3) -> @@ -327,12 +349,12 @@ tokenize([T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when ?arrow_op3(T1, T % ## Containers + punctuation tokens tokenize([T, T | Rest], Line, Column, Scope, Tokens) when T == $<; T == $> -> - Token = {list_to_atom([T, T]), {Line, Column, Column + 2}}, + Token = {list_to_atom([T, T]), {Line, {Column, Column + 2}, nil}}, handle_terminator(Rest, Line, Column + 2, Scope, Token, Tokens); tokenize([T | Rest], Line, Column, Scope, Tokens) when T == $(; T == ${; T == $}; T == $[; T == $]; T == $); T == $, -> - Token = {list_to_atom([T]), {Line, Column, Column + 1}}, + Token = {list_to_atom([T]), {Line, {Column, Column + 1}, nil}}, handle_terminator(Rest, Line, Column + 1, Scope, Token, Tokens); % ## Two Token Operators @@ -399,7 +421,8 @@ tokenize([$:, H | T] = Original, Line, Column, Scope, Tokens) when ?is_quote(H) true -> atom_safe; false -> atom_unsafe end, - tokenize(Rest, NewLine, NewColumn, Scope, [{Key, {Line, Column, NewColumn}, Unescaped} | Tokens]); + Token = {Key, {Line, {Column, NewColumn}, nil}, Unescaped}, + tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens]); {error, Reason} -> interpolation_error(Reason, Original, Tokens, " (for atom starting at line ~B)", [Line]) end; @@ -407,7 +430,8 @@ tokenize([$:, H | T] = Original, Line, Column, Scope, Tokens) when ?is_quote(H) tokenize([$: | String] = Original, Line, Column, Scope, Tokens) -> case tokenize_identifier(String, Line, Scope) of {_Kind, Atom, Rest, Length, _Ascii, _Special} -> - tokenize(Rest, Line, Column + 1 + Length, Scope, [{atom, {Line, Column, Column + 1 + Length}, Atom} | Tokens]); + Token = {atom, {Line, {Column, Column + 1 + Length}, nil}, Atom}, + tokenize(Rest, Line, Column + 1 + Length, Scope, [Token | Tokens]); empty -> unexpected_token(Original, Line, Column, Tokens); {error, Reason} -> @@ -421,9 +445,11 @@ tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) -> {error, Reason, Number} -> {error, {Line, Reason, Number}, T, Tokens}; {Rest, Number, Length} when is_integer(Number) -> - tokenize(Rest, Line, Column + Length, Scope, [{decimal, {Line, Column, Column + Length}, Number} | Tokens]); + Token = {decimal, {Line, {Column, Column + Length}, nil}, Number}, + tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens]); {Rest, Number, Length} -> - tokenize(Rest, Line, Column + Length, Scope, [{float, {Line, Column, Column + Length}, Number} | Tokens]) + Token = {float, {Line, {Column, Column + Length}, nil}, Number}, + tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens]) end; % Spaces @@ -435,10 +461,10 @@ tokenize([T | Rest], Line, Column, Scope, Tokens) when ?is_horizontal_space(T) - % End of line tokenize(";" ++ Rest, Line, Column, Scope, []) -> - tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, Column, Column + 1}}]); + tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, {Column, Column + 1}, nil}}]); tokenize(";" ++ Rest, Line, Column, Scope, [Top | _] = Tokens) when element(1, Top) /= ';' -> - tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, Column, Column + 1}} | Tokens]); + tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, {Column, Column + 1}, nil}} | Tokens]); tokenize("\\" = Original, Line, _Column, _Scope, Tokens) -> {error, {Line, "invalid escape \\ at end of file", []}, Original, Tokens}; @@ -464,10 +490,10 @@ tokenize("\r\n" ++ Rest, Line, Column, Scope, Tokens) -> % Others tokenize([$%, ${ | T], Line, Column, Scope, Tokens) -> - tokenize([${ | T], Line, Column + 1, Scope, [{'%{}', {Line, Column, Column + 1}} | Tokens]); + tokenize([${ | T], Line, Column + 1, Scope, [{'%{}', {Line, {Column, Column + 1}, nil}} | Tokens]); tokenize([$% | T], Line, Column, Scope, Tokens) -> - tokenize(T, Line, Column + 1, Scope, [{'%', {Line, Column, Column + 1}} | Tokens]); + tokenize(T, Line, Column + 1, Scope, [{'%', {Line, {Column, Column + 1}, nil}} | Tokens]); tokenize([$. | T], Line, Column, Scope, Tokens) -> {Rest, Counter, Offset, CommentTokens} = strip_dot_space(T, 0, Column + 1, Line, []), @@ -482,7 +508,8 @@ tokenize(String, Line, Column, Scope, Tokens) -> case Rest of [$: | T] when ?is_space(hd(T)) -> - tokenize(T, Line, Column + Length + 1, Scope, [{kw_identifier, {Line, Column, Column + Length + 1}, Atom} | Tokens]); + Token = {kw_identifier, {Line, {Column, Column + Length + 1}, nil}, Atom}, + tokenize(T, Line, Column + Length + 1, Scope, [Token | Tokens]); [$: | T] when hd(T) /= $: -> AtomName = atom_to_list(Atom) ++ [$:], Reason = {Line, "keyword argument must be followed by space after: ", AtomName}, @@ -519,7 +546,7 @@ strip_dot_space(T, Counter, Column, StartLine, Tokens) -> case strip_horizontal_space(T) of {"#" ++ R, _} -> {Rest, Comment, Length} = tokenize_comment(R, [$#], 1), - CommentToken = {comment, {StartLine + Counter, Column, Column + Length}, Comment}, + CommentToken = {comment, {StartLine + Counter, {Column, Column + Length}, nil}, Comment}, strip_dot_space(Rest, Counter, 1, StartLine, [CommentToken | Tokens]); {"\r\n" ++ Rest, _} -> strip_dot_space(Rest, Counter + 1, 1, StartLine, Tokens); @@ -546,7 +573,7 @@ handle_char(_) -> false. handle_heredocs(T, Line, Column, H, Scope, Tokens) -> case extract_heredoc_with_interpolation(Line, Column, Scope, true, T, H) of {ok, NewLine, NewColumn, Parts, Rest} -> - Token = {heredoc_type(H), {Line, Column, NewColumn}, unescape_tokens(Parts)}, + Token = {heredoc_type(H), {Line, {Column, NewColumn}, nil}, unescape_tokens(Parts)}, tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens]); {error, Reason} -> {error, Reason, [H, H, H] ++ T, Tokens} @@ -562,37 +589,39 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> true -> kw_identifier_safe; false -> kw_identifier_unsafe end, - tokenize(Rest, NewLine, NewColumn, Scope, [{Key, {Line, Column - 1, NewColumn}, Unescaped} | Tokens]); + Token = {Key, {Line, {Column - 1, NewColumn}, nil}, Unescaped}, + tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens]); {NewLine, NewColumn, Parts, Rest} -> - Token = {string_type(H), {Line, Column - 1, NewColumn}, unescape_tokens(Parts)}, + Token = {string_type(H), {Line, {Column - 1, NewColumn}, nil}, unescape_tokens(Parts)}, tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens]) end. handle_unary_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + Length + 1, Scope, [{kw_identifier, {Line, Column, Column + Length}, Op} | Tokens]); + Token = {kw_identifier, {Line, {Column, Column + Length}, nil}, Op}, + tokenize(Rest, Line, Column + Length + 1, Scope, [Token | Tokens]); handle_unary_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) -> case strip_horizontal_space(Rest) of {[$/ | _] = Remaining, Extra} -> - tokenize(Remaining, Line, Column + Length + Extra, Scope, - [{identifier, {Line, Column, Column + Length}, Op} | Tokens]); + Token = {identifier, {Line, {Column, Column + Length}, nil}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]); {Remaining, Extra} -> - tokenize(Remaining, Line, Column + Length + Extra, Scope, - [{Kind, {Line, Column, Column + Length}, Op} | Tokens]) + Token = {Kind, {Line, {Column, Column + Length}, nil}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]) end. handle_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) when ?is_space(hd(Rest)) -> - tokenize(Rest, Line, Column + Length + 1, Scope, - [{kw_identifier, {Line, Column, Column + Length}, Op} | Tokens]); + Token = {kw_identifier, {Line, {Column, Column + Length}, nil}, Op}, + tokenize(Rest, Line, Column + Length + 1, Scope, [Token | Tokens]); handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) -> case strip_horizontal_space(Rest) of {[$/ | _] = Remaining, Extra} -> - tokenize(Remaining, Line, Column + Length + Extra, Scope, - [{identifier, {Line, Column, Column + Length}, Op} | Tokens]); + Token = {identifier, {Line, {Column, Column + Length}, nil}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]); {Remaining, Extra} -> - tokenize(Remaining, Line, Column + Length + Extra, Scope, - add_token_with_nl({Kind, {Line, Column, Column + Length}, Op}, Tokens)) + Token = {Kind, {Line, {Column, Column + Length}, nil}, Op}, + tokenize(Remaining, Line, Column + Length + Extra, Scope, add_token_with_nl(Token, Tokens)) end. handle_comments(CommentTokens, Tokens, Scope) -> @@ -622,7 +651,7 @@ handle_dot([$., T | Rest], Line, Column, DotColumn, Scope, Tokens, CommentTokens % ## Exception for .( as it needs to be treated specially in the parser handle_dot([$., $( | Rest], Line, Column, DotColumn, Scope, Tokens, CommentTokens, Counter) -> - TokensSoFar = add_token_with_nl({dot_call_op, {Line, DotColumn, DotColumn + 1}, '.'}, Tokens), + TokensSoFar = add_token_with_nl({dot_call_op, {Line, {DotColumn, DotColumn + 1}, nil}, '.'}, Tokens), tokenize([$( | Rest], Line + Counter, Column + 2, Scope, handle_comments(CommentTokens, TokensSoFar, Scope)); handle_dot([$., H | T] = Original, Line, Column, DotColumn, Scope, Tokens, CommentTokens, Counter) when ?is_quote(H) -> @@ -631,7 +660,7 @@ handle_dot([$., H | T] = Original, Line, Column, DotColumn, Scope, Tokens, Comme case unsafe_to_atom(Part, Line, Scope) of {ok, Atom} -> Token = check_call_identifier(Line + Counter, Column, max(NewColumn - Column, 0), Atom, Rest), - TokensSoFar = add_token_with_nl({'.', {Line, DotColumn, DotColumn + 1}}, Tokens), + TokensSoFar = add_token_with_nl({'.', {Line, {DotColumn, DotColumn + 1}, nil}}, Tokens), tokenize(Rest, NewLine, NewColumn, Scope, [Token | handle_comments(CommentTokens, TokensSoFar, Scope)]); {error, Reason} -> {error, Reason, Original, Tokens} @@ -641,12 +670,12 @@ handle_dot([$., H | T] = Original, Line, Column, DotColumn, Scope, Tokens, Comme end; handle_dot([$. | Rest], Line, Column, DotColumn, Scope, Tokens, CommentTokens, Counter) -> - TokensSoFar = add_token_with_nl({'.', {Line, DotColumn, DotColumn + 1}}, Tokens), + TokensSoFar = add_token_with_nl({'.', {Line, {DotColumn, DotColumn + 1}, nil}}, Tokens), tokenize(Rest, Line + Counter, Column + 1, Scope, handle_comments(CommentTokens, TokensSoFar, Scope)). handle_call_identifier(Rest, Line, Column, DotColumn, Length, Op, Scope, Tokens, CommentTokens, Counter) -> - {_, {NewLine, _, NewColumn}, _} = Token = check_call_identifier(Line + Counter, Column, Length, Op, Rest), - TokensSoFar = add_token_with_nl({'.', {Line, DotColumn, DotColumn + 1}}, Tokens), + {_, {NewLine, {_, NewColumn}, _}, _} = Token = check_call_identifier(Line + Counter, Column, Length, Op, Rest), + TokensSoFar = add_token_with_nl({'.', {Line, {DotColumn, DotColumn + 1}, nil}}, Tokens), tokenize(Rest, NewLine, NewColumn, Scope, [Token | handle_comments(CommentTokens, TokensSoFar, Scope)]). % ## Ambiguous unary/binary operators tokens @@ -657,7 +686,8 @@ handle_space_sensitive_tokens([Sign, NotMarker | T], Line, Column, Scope, [{Iden NotMarker /= $%, NotMarker /= $+, NotMarker /= $-, NotMarker /= $/, NotMarker /= $>, %% operators Identifier == identifier -> Rest = [NotMarker | T], - tokenize(Rest, Line, Column + 1, Scope, [{dual_op, {Line, Column, Column + 1}, list_to_atom([Sign])}, setelement(1, H, op_identifier) | Tokens]); + DualOpToken = {dual_op, {Line, {Column, Column + 1}, nil}, list_to_atom([Sign])}, + tokenize(Rest, Line, Column + 1, Scope, [DualOpToken, setelement(1, H, op_identifier) | Tokens]); handle_space_sensitive_tokens(String, Line, Column, Scope, Tokens) -> tokenize(String, Line, Column, Scope, Tokens). @@ -667,7 +697,7 @@ handle_space_sensitive_tokens(String, Line, Column, Scope, Tokens) -> eol(_Line, _Column, [{';', _} | _] = Tokens) -> Tokens; eol(_Line, _Column, [{',', _} | _] = Tokens) -> Tokens; eol(_Line, _Column, [{eol, _} | _] = Tokens) -> Tokens; -eol(Line, Column, Tokens) -> [{eol, {Line, Column, Column + 1}} | Tokens]. +eol(Line, Column, Tokens) -> [{eol, {Line, {Column, Column + 1}, nil}} | Tokens]. unsafe_to_atom(Part, Line, #elixir_tokenizer{}) when is_binary(Part) andalso size(Part) > 255; @@ -925,16 +955,17 @@ tokenize_alias(Rest, Line, Column, Atom, Length, Ascii, Special, Scope, Tokens) Reason = {Line, invalid_character_error("alias", hd(Special)), AtomName}, {error, Reason, AtomName ++ Rest, Tokens}; true -> - tokenize(Rest, Line, Column + Length, Scope, [{aliases, {Line, Column, Column + Length}, [Atom]} | Tokens]) + AliasesToken = {aliases, {Line, {Column, Column + Length}, nil}, [Atom]}, + tokenize(Rest, Line, Column + Length, Scope, [AliasesToken | Tokens]) end. tokenize_other(Rest, Line, Column, Atom, Length, Scope, Tokens) -> case tokenize_keyword_or_identifier(Rest, Line, Column, Length, Atom, Tokens) of - {keyword, Rest, {_, {_, _, EndColumn}} = Check, T} -> + {keyword, Rest, {_, {_, {_, EndColumn}, _}} = Check, T} -> handle_terminator(Rest, Line, EndColumn, Scope, Check, T); - {keyword, Rest, {_, {_, _, EndColumn}, _} = Check, T} -> + {keyword, Rest, {_, {_, {_, EndColumn}, _}, _} = Check, T} -> handle_terminator(Rest, Line, EndColumn, Scope, Check, T); - {identifier, Rest, {_, {_, _, EndColumn}, _} = Token} -> + {identifier, Rest, {_, {_, {_, EndColumn}, _}, _} = Token} -> tokenize(Rest, Line, EndColumn, Scope, [Token | Tokens]); {error, _, _, _} = Error -> Error @@ -944,8 +975,8 @@ tokenize_keyword_or_identifier(Rest, Line, Column, Length, Atom, Tokens) -> case check_keyword(Line, Column, Length, Atom, Tokens, Rest) of nomatch -> {identifier, Rest, check_call_identifier(Line, Column, Length, Atom, Rest)}; - {ok, [{in_op, {_, _, InEndColumn}, in} | [{unary_op, {NotLine, NotColumn, _}, 'not'} | T]]} -> - {keyword, Rest, {in_op, {NotLine, NotColumn, InEndColumn}, 'not in'}, T}; + {ok, [{in_op, {_, {_, InEndColumn}, _}, in} | [{unary_op, {NotLine, {NotColumn, _}, _}, 'not'} | T]]} -> + {keyword, Rest, {in_op, {NotLine, {NotColumn, InEndColumn}, nil}, 'not in'}, T}; {ok, [Check | T]} -> {keyword, Rest, Check, T}; {error, Message, Token} -> @@ -955,11 +986,11 @@ tokenize_keyword_or_identifier(Rest, Line, Column, Length, Atom, Tokens) -> %% Check if it is a call identifier (paren | bracket | do) check_call_identifier(Line, Column, Length, Atom, [$( | _]) -> - {paren_identifier, {Line, Column, Column + Length}, Atom}; + {paren_identifier, {Line, {Column, Column + Length}, nil}, Atom}; check_call_identifier(Line, Column, Length, Atom, [$[ | _]) -> - {bracket_identifier, {Line, Column, Column + Length}, Atom}; + {bracket_identifier, {Line, {Column, Column + Length}, nil}, Atom}; check_call_identifier(Line, Column, Length, Atom, _Rest) -> - {identifier, {Line, Column, Column + Length}, Atom}. + {identifier, {Line, {Column, Column + Length}, nil}, Atom}. add_token_with_nl({unary_op, _, _} = Left, T) -> [Left | T]; add_token_with_nl(Left, [{eol, _} | T]) -> [Left | T]; @@ -1011,7 +1042,7 @@ check_terminator({E, _}, [{S, _} | Terminators]) when S == '<<', E == '>>' -> Terminators; -check_terminator({E, {Line, _, _}}, [{Start, {StartLine, _, _}} | _]) when +check_terminator({E, {Line, _, _}}, [{Start, {StartLine, {_, _}, _}} | _]) when E == 'end'; E == ')'; E == ']'; E == '}'; E == '>>' -> End = terminator(Start), MessagePrefix = io_lib:format("\"~ts\" is missing terminator \"~ts\". unexpected token: \"", @@ -1050,14 +1081,14 @@ terminator('<<') -> '>>'. check_keyword(_Line, _Column, _Length, _Atom, [{'.', _} | _], _Rest) -> nomatch; check_keyword(DoLine, DoColumn, _Length, do, - [{Identifier, {Line, Column, EndColumn}, Atom} | T], _Rest) when Identifier == identifier -> - {ok, add_token_with_nl({do, {DoLine, DoColumn, DoColumn + 2}}, - [{do_identifier, {Line, Column, EndColumn}, Atom} | T])}; + [{Identifier, {Line, {Column, EndColumn}, Meta}, Atom} | T], _Rest) when Identifier == identifier -> + {ok, add_token_with_nl({do, {DoLine, {DoColumn, DoColumn + 2}, nil}}, + [{do_identifier, {Line, {Column, EndColumn}, Meta}, Atom} | T])}; check_keyword(_Line, _Column, _Length, do, [{'fn', _} | _], _Rest) -> {error, do_with_fn_error("unexpected token \"do\""), "do"}; check_keyword(Line, Column, _Length, do, Tokens, _Rest) -> case do_keyword_valid(Tokens) of - true -> {ok, add_token_with_nl({do, {Line, Column, Column + 2}}, Tokens)}; + true -> {ok, add_token_with_nl({do, {Line, {Column, Column + 2}, nil}}, Tokens)}; false -> {error, invalid_do_error("unexpected token \"do\""), "do"} end; check_keyword(Line, Column, Length, Atom, Tokens, Rest) -> @@ -1065,15 +1096,15 @@ check_keyword(Line, Column, Length, Atom, Tokens, Rest) -> false -> nomatch; token -> - {ok, [{Atom, {Line, Column, Column + Length}} | Tokens]}; + {ok, [{Atom, {Line, {Column, Column + Length}, nil}} | Tokens]}; block -> - {ok, [{block_identifier, {Line, Column, Column + Length}, Atom} | Tokens]}; + {ok, [{block_identifier, {Line, {Column, Column + Length}, nil}, Atom} | Tokens]}; Kind -> case strip_horizontal_space(Rest) of {[$/ | _], _} -> - {ok, [{identifier, {Line, Column, Column + Length}, Atom} | Tokens]}; + {ok, [{identifier, {Line, {Column, Column + Length}, nil}, Atom} | Tokens]}; _ -> - {ok, add_token_with_nl({Kind, {Line, Column, Column + Length}, Atom}, Tokens)} + {ok, add_token_with_nl({Kind, {Line, {Column, Column + Length}, nil}, Atom}, Tokens)} end end. diff --git a/lib/elixir/test/erlang/string_test.erl b/lib/elixir/test/erlang/string_test.erl index 8ec706bf6..ca209cd7d 100644 --- a/lib/elixir/test/erlang/string_test.erl +++ b/lib/elixir/test/erlang/string_test.erl @@ -27,51 +27,51 @@ extract_interpolations_with_escaped_interpolation_test() -> extract_interpolations_with_interpolation_test() -> [<<"f">>, - {{1, 2, 7}, [{atom, {1, 4, 6}, o}]}, + {{1, {2, 7}, nil}, [{atom, {1, {4, 6}, nil}, o}]}, <<"o">>] = extract_interpolations("f#{:o}o"). extract_interpolations_with_two_interpolations_test() -> [<<"f">>, - {{1, 2, 7}, [{atom, {1, 4, 6}, o}]}, {{1, 7, 12}, [{atom, {1, 9, 11}, o}]}, + {{1, {2, 7}, nil}, [{atom, {1, {4, 6}, nil}, o}]}, {{1, {7, 12}, nil}, [{atom, {1, {9, 11}, nil}, o}]}, <<"o">>] = extract_interpolations("f#{:o}#{:o}o"). extract_interpolations_with_only_two_interpolations_test() -> - [{{1, 1, 6}, [{atom, {1, 3, 5}, o}]}, - {{1, 6, 11}, [{atom, {1, 8, 10}, o}]}] = extract_interpolations("#{:o}#{:o}"). + [{{1, {1, 6}, nil}, [{atom, {1, {3, 5}, nil}, o}]}, + {{1, {6, 11}, nil}, [{atom, {1, {8, 10}, nil}, o}]}] = extract_interpolations("#{:o}#{:o}"). extract_interpolations_with_tuple_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 8}, [{'{', {1, 4, 5}}, {decimal, {1, 5, 6}, 1}, {'}', {1, 6, 7}}]}, + {{1, {2, 8}, nil}, [{'{', {1, {4, 5}, nil}}, {decimal, {1, {5, 6}, nil}, 1}, {'}', {1, {6, 7}, nil}}]}, <<"o">>] = extract_interpolations("f#{{1}}o"). extract_interpolations_with_many_expressions_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 3}, [{decimal, {1, 4, 5}, 1}, {eol, {1, 5, 6}}, {decimal, {2, 1, 2}, 2}]}, + {{1, {2, 3}, nil}, [{decimal, {1, {4, 5}, nil}, 1}, {eol, {1, {5, 6}, nil}}, {decimal, {2, {1, 2}, nil}, 2}]}, <<"o">>] = extract_interpolations("f#{1\n2}o"). extract_interpolations_with_right_curly_inside_string_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 10}, [{bin_string, {1, 4, 9}, [<<"f}o">>]}]}, + {{1, {2, 10}, nil}, [{bin_string, {1, {4, 9}, nil}, [<<"f}o">>]}]}, <<"o">>] = extract_interpolations("f#{\"f}o\"}o"). extract_interpolations_with_left_curly_inside_string_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 10}, [{bin_string, {1, 4, 9}, [<<"f{o">>]}]}, + {{1, {2, 10}, nil}, [{bin_string, {1, {4, 9}, nil}, [<<"f{o">>]}]}, <<"o">>] = extract_interpolations("f#{\"f{o\"}o"). extract_interpolations_with_escaped_quote_inside_string_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 11}, [{bin_string, {1, 4, 10}, [<<"f\"o">>]}]}, + {{1, {2, 11}, nil}, [{bin_string, {1, {4, 10}, nil}, [<<"f\"o">>]}]}, <<"o">>] = extract_interpolations("f#{\"f\\\"o\"}o"). extract_interpolations_with_less_than_operation_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 8}, [{decimal, {1, 4, 5}, 1}, {rel_op, {1, 5, 6}, '<'}, {decimal, {1, 6, 7}, 2}]}, + {{1, {2, 8}, nil}, [{decimal, {1, {4, 5}, nil}, 1}, {rel_op, {1, {5, 6}, nil}, '<'}, {decimal, {1, {6, 7}, nil}, 2}]}, <<"o">>] = extract_interpolations("f#{1<2}o"). extract_interpolations_with_an_escaped_character_test() -> [<<"f">>, - {{1, 2, 17}, [{char, {1, 4, 7}, 7}, {rel_op, {1, 8, 9}, '>'}, {char, {1, 10, 13}, 7}]} + {{1, {2, 17}, nil}, [{char, {1, {4, 7}, nil}, 7}, {rel_op, {1, {8, 9}, nil}, '>'}, {char, {1, {10, 13}, nil}, 7}]} ] = extract_interpolations("f#{?\\a > ?\\a }"). extract_interpolations_with_invalid_expression_inside_interpolation_test() -> diff --git a/lib/elixir/test/erlang/tokenizer_test.erl b/lib/elixir/test/erlang/tokenizer_test.erl index da8c97474..94e522e1c 100644 --- a/lib/elixir/test/erlang/tokenizer_test.erl +++ b/lib/elixir/test/erlang/tokenizer_test.erl @@ -13,193 +13,210 @@ tokenize_error(String) -> Error. type_test() -> - [{decimal, {1, 1, 2}, 1}, {type_op, {1, 3, 5}, '::'}, {decimal, {1, 6, 7}, 3}] = tokenize("1 :: 3"), - [{identifier, {1, 1, 5}, name}, - {'.', {1, 5, 6}}, - {paren_identifier, {1, 6, 8}, '::'}, - {'(', {1, 8, 9}}, - {decimal, {1, 9, 10}, 3}, - {')', {1, 10, 11}}] = tokenize("name.::(3)"). + [{decimal, {1, {1, 2}, nil}, 1}, + {type_op, {1, {3, 5}, nil}, '::'}, + {decimal, {1, {6, 7}, nil}, 3}] = tokenize("1 :: 3"), + [{identifier, {1, {1, 5}, nil}, name}, + {'.', {1, {5, 6}, nil}}, + {paren_identifier, {1, {6, 8}, nil}, '::'}, + {'(', {1, {8, 9}, nil}}, + {decimal, {1, {9, 10}, nil}, 3}, + {')', {1, {10, 11}, nil}}] = tokenize("name.::(3)"). arithmetic_test() -> - [{decimal, {1, 1, 2}, 1}, {dual_op, {1, 3, 4}, '+'}, {decimal, {1, 5, 6}, 2}, {dual_op, {1, 7, 8}, '+'}, {decimal, {1, 9, 10}, 3}] = tokenize("1 + 2 + 3"). + [{decimal, {1, {1, 2}, nil}, 1}, + {dual_op, {1, {3, 4}, nil}, '+'}, + {decimal, {1, {5, 6}, nil}, 2}, + {dual_op, {1, {7, 8}, nil}, '+'}, + {decimal, {1, {9, 10}, nil}, 3}] = tokenize("1 + 2 + 3"). op_kw_test() -> - [{atom, {1, 1, 5}, foo}, {dual_op, {1, 5, 6}, '+'}, {atom, {1, 6, 10}, bar}] = tokenize(":foo+:bar"). + [{atom, {1, {1, 5}, nil}, foo}, + {dual_op, {1, {5, 6}, nil}, '+'}, + {atom, {1, {6, 10}, nil}, bar}] = tokenize(":foo+:bar"). scientific_test() -> - [{float, {1, 1, 7}, 0.1}] = tokenize("1.0e-1"), - [{float, {1, 1, 16}, 1.2345678e-7}] = tokenize("1_234.567_8e-10"), + [{float, {1, {1, 7}, nil}, 0.1}] = tokenize("1.0e-1"), + [{float, {1, {1, 16}, nil}, 1.2345678e-7}] = tokenize("1_234.567_8e-10"), {1, "invalid float number ", "1.0e309"} = tokenize_error("1.0e309"). hex_bin_octal_test() -> - [{hex, {1, 1, 5}, 255}] = tokenize("0xFF"), - [{hex, {1, 1, 6}, 255}] = tokenize("0xF_F"), - [{octal, {1, 1, 5}, 63}] = tokenize("0o77"), - [{octal, {1, 1, 6}, 63}] = tokenize("0o7_7"), - [{binary, {1, 1, 5}, 3}] = tokenize("0b11"), - [{binary, {1, 1, 6}, 3}] = tokenize("0b1_1"). + [{hex, {1, {1, 5}, nil}, 255}] = tokenize("0xFF"), + [{hex, {1, {1, 6}, nil}, 255}] = tokenize("0xF_F"), + [{octal, {1, {1, 5}, nil}, 63}] = tokenize("0o77"), + [{octal, {1, {1, 6}, nil}, 63}] = tokenize("0o7_7"), + [{binary, {1, {1, 5}, nil}, 3}] = tokenize("0b11"), + [{binary, {1, {1, 6}, nil}, 3}] = tokenize("0b1_1"). unquoted_atom_test() -> - [{atom, {1, 1, 3}, '+'}] = tokenize(":+"), - [{atom, {1, 1, 3}, '-'}] = tokenize(":-"), - [{atom, {1, 1, 3}, '*'}] = tokenize(":*"), - [{atom, {1, 1, 3}, '/'}] = tokenize(":/"), - [{atom, {1, 1, 3}, '='}] = tokenize(":="), - [{atom, {1, 1, 4}, '&&'}] = tokenize(":&&"). + [{atom, {1, {1, 3}, nil}, '+'}] = tokenize(":+"), + [{atom, {1, {1, 3}, nil}, '-'}] = tokenize(":-"), + [{atom, {1, {1, 3}, nil}, '*'}] = tokenize(":*"), + [{atom, {1, {1, 3}, nil}, '/'}] = tokenize(":/"), + [{atom, {1, {1, 3}, nil}, '='}] = tokenize(":="), + [{atom, {1, {1, 4}, nil}, '&&'}] = tokenize(":&&"). quoted_atom_test() -> - [{atom_unsafe, {1, 1, 11}, [<<"foo bar">>]}] = tokenize(":\"foo bar\""). + [{atom_unsafe, {1, {1, 11}, nil}, [<<"foo bar">>]}] = tokenize(":\"foo bar\""). oversized_atom_test() -> OversizedAtom = [$: | string:copies("a", 256)], {1, "atom length must be less than system limit", ":"} = tokenize_error(OversizedAtom). op_atom_test() -> - [{atom, {1, 1, 6}, f0_1}] = tokenize(":f0_1"). + [{atom, {1, {1, 6}, nil}, f0_1}] = tokenize(":f0_1"). kw_test() -> - [{kw_identifier, {1, 1, 4}, do}] = tokenize("do: "), - [{kw_identifier, {1, 1, 4}, a@}] = tokenize("a@: "), - [{kw_identifier, {1, 1, 4}, 'A@'}] = tokenize("A@: "), - [{kw_identifier, {1, 1, 5}, a@b}] = tokenize("a@b: "), - [{kw_identifier, {1, 1, 5}, 'A@!'}] = tokenize("A@!: "), - [{kw_identifier, {1, 1, 5}, 'a@!'}] = tokenize("a@!: "), - [{kw_identifier_unsafe, {1, 1, 10}, [<<"foo bar">>]}] = tokenize("\"foo bar\": "). + [{kw_identifier, {1, {1, 4}, nil}, do}] = tokenize("do: "), + [{kw_identifier, {1, {1, 4}, nil}, a@}] = tokenize("a@: "), + [{kw_identifier, {1, {1, 4}, nil}, 'A@'}] = tokenize("A@: "), + [{kw_identifier, {1, {1, 5}, nil}, a@b}] = tokenize("a@b: "), + [{kw_identifier, {1, {1, 5}, nil}, 'A@!'}] = tokenize("A@!: "), + [{kw_identifier, {1, {1, 5}, nil}, 'a@!'}] = tokenize("a@!: "), + [{kw_identifier_unsafe, {1, {1, 10}, nil}, [<<"foo bar">>]}] = tokenize("\"foo bar\": "). integer_test() -> - [{decimal, {1, 1, 4}, 123}] = tokenize("123"), - [{decimal, {1, 1, 4}, 123}, {';', {1, 4, 5}}] = tokenize("123;"), - [{eol, {1, 1, 2}}, {decimal, {3, 1, 4}, 123}] = tokenize("\n\n123"), - [{decimal, {1, 3, 6}, 123}, {decimal, {1, 8, 11}, 234}] = tokenize(" 123 234 "). + [{decimal, {1, {1, 4}, nil}, 123}] = tokenize("123"), + [{decimal, {1, {1, 4}, nil}, 123}, {';', {1, {4, 5}, nil}}] = tokenize("123;"), + [{eol, {1, {1, 2}, nil}}, {decimal, {3, {1, 4}, nil}, 123}] = tokenize("\n\n123"), + [{decimal, {1, {3, 6}, nil}, 123}, {decimal, {1, {8, 11}, nil}, 234}] = tokenize(" 123 234 "). float_test() -> - [{float, {1, 1, 5}, 12.3}] = tokenize("12.3"), - [{float, {1, 1, 5}, 12.3}, {';', {1, 5, 6}}] = tokenize("12.3;"), - [{eol, {1, 1, 2}}, {float, {3, 1, 5}, 12.3}] = tokenize("\n\n12.3"), - [{float, {1, 3, 7}, 12.3}, {float, {1, 9, 13}, 23.4}] = tokenize(" 12.3 23.4 "), + [{float, {1, {1, 5}, nil}, 12.3}] = tokenize("12.3"), + [{float, {1, {1, 5}, nil}, 12.3}, {';', {1, {5, 6}, nil}}] = tokenize("12.3;"), + [{eol, {1, {1, 2}, nil}}, {float, {3, {1, 5}, nil}, 12.3}] = tokenize("\n\n12.3"), + [{float, {1, {3, 7}, nil}, 12.3}, {float, {1, {9, 13}, nil}, 23.4}] = tokenize(" 12.3 23.4 "), OversizedFloat = string:copies("9", 310) ++ ".0", {1, "invalid float number ", OversizedFloat} = tokenize_error(OversizedFloat). comments_test() -> - [{decimal, {1, 1, 2}, 1}, {eol, {1, 3, 4}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2"), - [{decimal, {1, 1, 2}, 1}, {comment, {1, 3, 12}, "# Comment"}, - {eol, {1, 12, 13}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]), - [{comment, {1, 1, 10}, "# Comment"}] = tokenize("# Comment", [{preserve_comments, true}]). + [{decimal, {1, {1, 2}, nil}, 1}, + {eol, {1, {3, 4}, nil}}, + {decimal, {2, {1, 2}, nil}, 2}] = tokenize("1 # Comment\n2"), + [{decimal, {1, {1, 2}, nil}, 1}, + {comment, {1, {3, 12}, nil}, "# Comment"}, + {eol, {1, {12, 13}, nil}}, + {decimal, {2, {1, 2}, nil}, 2}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]), + [{comment, {1, {1, 10}, nil}, "# Comment"}] = tokenize("# Comment", [{preserve_comments, true}]). identifier_test() -> - [{identifier, {1, 1, 4}, abc}] = tokenize("abc "), - [{identifier, {1, 1, 5}, 'abc?'}] = tokenize("abc?"), - [{identifier, {1, 1, 5}, 'abc!'}] = tokenize("abc!"), - [{identifier, {1, 1, 5}, 'a0c!'}] = tokenize("a0c!"), - [{paren_identifier, {1, 1, 4}, 'a0c'}, {'(', {1, 4, 5}}, {')', {1, 5, 6}}] = tokenize("a0c()"), - [{paren_identifier, {1, 1, 5}, 'a0c!'}, {'(', {1, 5, 6}}, {')', {1, 6, 7}}] = tokenize("a0c!()"). + [{identifier, {1, {1, 4}, nil}, abc}] = tokenize("abc "), + [{identifier, {1, {1, 5}, nil}, 'abc?'}] = tokenize("abc?"), + [{identifier, {1, {1, 5}, nil}, 'abc!'}] = tokenize("abc!"), + [{identifier, {1, {1, 5}, nil}, 'a0c!'}] = tokenize("a0c!"), + [{paren_identifier, {1, {1, 4}, nil}, 'a0c'}, {'(', {1, {4, 5}, nil}}, {')', {1, {5, 6}, nil}}] = tokenize("a0c()"), + [{paren_identifier, {1, {1, 5}, nil}, 'a0c!'}, {'(', {1, {5, 6}, nil}}, {')', {1, {6, 7}, nil}}] = tokenize("a0c!()"). module_macro_test() -> - [{identifier, {1, 1, 11}, '__MODULE__'}] = tokenize("__MODULE__"). + [{identifier, {1, {1, 11}, nil}, '__MODULE__'}] = tokenize("__MODULE__"). triple_dot_test() -> - [{identifier, {1, 1, 4}, '...'}] = tokenize("..."), - [{'.', {1, 1, 2}}, {identifier, {1, 3, 5}, '..'}] = tokenize(". .."). + [{identifier, {1, {1, 4}, nil}, '...'}] = tokenize("..."), + [{'.', {1, {1, 2}, nil}}, {identifier, {1, {3, 5}, nil}, '..'}] = tokenize(". .."). dot_test() -> - [{identifier, {1, 1, 4}, foo}, - {'.', {1, 4, 5}}, - {identifier, {1, 5, 8}, bar}, - {'.', {1, 8, 9}}, - {identifier, {1, 9, 12}, baz}] = tokenize("foo.bar.baz"). + [{identifier, {1, {1, 4}, nil}, foo}, + {'.', {1, {4, 5}, nil}}, + {identifier, {1, {5, 8}, nil}, bar}, + {'.', {1, {8, 9}, nil}}, + {identifier, {1, {9, 12}, nil}, baz}] = tokenize("foo.bar.baz"). dot_keyword_test() -> - [{identifier, {1, 1, 4}, foo}, - {'.', {1, 4, 5}}, - {identifier, {1, 5, 7}, do}] = tokenize("foo.do"). + [{identifier, {1, {1, 4}, nil}, foo}, + {'.', {1, {4, 5}, nil}}, + {identifier, {1, {5, 7}, nil}, do}] = tokenize("foo.do"). newline_test() -> - [{identifier, {1, 1, 4}, foo}, - {'.', {2, 1, 2}}, - {identifier, {2, 2, 5}, bar}] = tokenize("foo\n.bar"), - [{decimal, {1, 1, 2}, 1}, - {two_op, {2, 1, 3}, '++'}, - {decimal, {2, 3, 4}, 2}] = tokenize("1\n++2"). + [{identifier, {1, {1, 4}, nil}, foo}, + {'.', {2, {1, 2}, nil}}, + {identifier, {2, {2, 5}, nil}, bar}] = tokenize("foo\n.bar"), + [{decimal, {1, {1, 2}, nil}, 1}, + {two_op, {2, {1, 3}, nil}, '++'}, + {decimal, {2, {3, 4}, nil}, 2}] = tokenize("1\n++2"). dot_newline_operator_test() -> - [{identifier, {1, 1, 4}, foo}, - {'.', {1, 4, 5}}, - {identifier, {2, 1, 2}, '+'}, - {decimal, {2, 2, 3}, 1}] = tokenize("foo.\n+1"), - [{identifier, {1, 1, 4}, foo}, - {'.', {1, 4, 5}}, - {identifier, {2, 1, 2}, '+'}, - {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1"), - [{identifier, {1, 1, 4}, foo}, - {'.', {1, 4, 5}}, - {comment, {1, 5, 9}, "#bar"}, - {identifier, {2, 1, 2}, '+'}, - {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]). + [{identifier, {1, {1, 4}, nil}, foo}, + {'.', {1, {4, 5}, nil}}, + {identifier, {2, {1, 2}, nil}, '+'}, + {decimal, {2, {2, 3}, nil}, 1}] = tokenize("foo.\n+1"), + [{identifier, {1, {1, 4}, nil}, foo}, + {'.', {1, {4, 5}, nil}}, + {identifier, {2, {1, 2}, nil}, '+'}, + {decimal, {2, {2, 3}, nil}, 1}] = tokenize("foo.#bar\n+1"), + [{identifier, {1, {1, 4}, nil}, foo}, + {'.', {1, {4, 5}, nil}}, + {comment, {1, {5, 9}, nil}, "#bar"}, + {identifier, {2, {1, 2}, nil}, '+'}, + {decimal, {2, {2, 3}, nil}, 1}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]). aliases_test() -> - [{'aliases', {1, 1, 4}, ['Foo']}] = tokenize("Foo"), - [{'aliases', {1, 1, 4}, ['Foo']}, - {'.', {1, 4, 5}}, - {'aliases', {1, 5, 8}, ['Bar']}, - {'.', {1, 8, 9}}, - {'aliases', {1, 9, 12}, ['Baz']}] = tokenize("Foo.Bar.Baz"). + [{'aliases', {1, {1, 4}, nil}, ['Foo']}] = tokenize("Foo"), + [{'aliases', {1, {1, 4}, nil}, ['Foo']}, + {'.', {1, {4, 5}, nil}}, + {'aliases', {1, {5, 8}, nil}, ['Bar']}, + {'.', {1, {8, 9}, nil}}, + {'aliases', {1, {9, 12}, nil}, ['Baz']}] = tokenize("Foo.Bar.Baz"). string_test() -> - [{bin_string, {1, 1, 6}, [<<"foo">>]}] = tokenize("\"foo\""), - [{bin_string, {1, 1, 6}, [<<"f\"">>]}] = tokenize("\"f\\\"\""), - [{list_string, {1, 1, 6}, [<<"foo">>]}] = tokenize("'foo'"). + [{bin_string, {1, {1, 6}, nil}, [<<"foo">>]}] = tokenize("\"foo\""), + [{bin_string, {1, {1, 6}, nil}, [<<"f\"">>]}] = tokenize("\"f\\\"\""), + [{list_string, {1, {1, 6}, nil}, [<<"foo">>]}] = tokenize("'foo'"). empty_string_test() -> - [{bin_string, {1, 1, 3}, [<<>>]}] = tokenize("\"\""), - [{list_string, {1, 1, 3}, [<<>>]}] = tokenize("''"). + [{bin_string, {1, {1, 3}, nil}, [<<>>]}] = tokenize("\"\""), + [{list_string, {1, {1, 3}, nil}, [<<>>]}] = tokenize("''"). addadd_test() -> - [{identifier, {1, 1, 2}, x}, {two_op, {1, 3, 5}, '++'}, {identifier, {1, 6, 7}, y}] = tokenize("x ++ y"). + [{identifier, {1, {1, 2}, nil}, x}, + {two_op, {1, {3, 5}, nil}, '++'}, + {identifier, {1, {6, 7}, nil}, y}] = tokenize("x ++ y"). space_test() -> - [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 5, 6}, '-'}, {decimal, {1, 6, 7}, 2}] = tokenize("foo -2"), - [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 6, 7}, '-'}, {decimal, {1, 7, 8}, 2}] = tokenize("foo -2"). + [{op_identifier, {1, {1, 4}, nil}, foo}, + {dual_op, {1, {5, 6}, nil}, '-'}, + {decimal, {1, {6, 7}, nil}, 2}] = tokenize("foo -2"), + [{op_identifier, {1, {1, 4}, nil}, foo}, + {dual_op, {1, {6, 7}, nil}, '-'}, + {decimal, {1, {7, 8}, nil}, 2}] = tokenize("foo -2"). chars_test() -> - [{char, {1, 1, 3}, 97}] = tokenize("?a"), - [{char, {1, 1, 3}, 99}] = tokenize("?c"), - [{char, {1, 1, 4}, 0}] = tokenize("?\\0"), - [{char, {1, 1, 4}, 7}] = tokenize("?\\a"), - [{char, {1, 1, 4}, 10}] = tokenize("?\\n"), - [{char, {1, 1, 4}, 92}] = tokenize("?\\\\"). + [{char, {1, {1, 3}, nil}, 97}] = tokenize("?a"), + [{char, {1, {1, 3}, nil}, 99}] = tokenize("?c"), + [{char, {1, {1, 4}, nil}, 0}] = tokenize("?\\0"), + [{char, {1, {1, 4}, nil}, 7}] = tokenize("?\\a"), + [{char, {1, {1, 4}, nil}, 10}] = tokenize("?\\n"), + [{char, {1, {1, 4}, nil}, 92}] = tokenize("?\\\\"). interpolation_test() -> - [{bin_string, {1, 1, 9}, [<<"f">>, - {{1, 3, 8}, [{identifier, {1, 5, 7}, oo}]}]}, - {two_op, {1, 10, 12}, '<>'}, {bin_string, {1, 13, 15}, - [<<>>]}] = tokenize("\"f#{oo}\" <> \"\""). + [{bin_string, {1, {1, 9}, nil}, [<<"f">>, {{1, {3, 8}, nil}, [{identifier, {1, {5, 7}, nil}, oo}]}]}, + {two_op, {1, {10, 12}, nil}, '<>'}, + {bin_string, {1, {13, 15}, nil}, [<<>>]}] = tokenize("\"f#{oo}\" <> \"\""). capture_test() -> - [{capture_op, {1, 1, 2}, '&'}, - {identifier, {1, 2, 4}, '||'}, - {mult_op, {1, 4, 5}, '/'}, - {decimal, {1, 5, 6}, 2}] = tokenize("&||/2"), - [{capture_op, {1, 1, 2}, '&'}, - {identifier, {1, 2, 4}, 'or'}, - {mult_op, {1, 4, 5}, '/'}, - {decimal, {1, 5, 6}, 2}] = tokenize("&or/2"), - [{capture_op,{1,1,2},'&'}, - {unary_op,{1,2,5},'not'}, - {decimal,{1,6,7},1}, - {',',{1,7,8}}, - {decimal,{1,9,10},2}] = tokenize("¬ 1, 2"). + [{capture_op, {1, {1, 2}, nil}, '&'}, + {identifier, {1, {2, 4}, nil}, '||'}, + {mult_op, {1, {4, 5}, nil}, '/'}, + {decimal, {1, {5, 6}, nil}, 2}] = tokenize("&||/2"), + [{capture_op, {1, {1, 2}, nil}, '&'}, + {identifier, {1, {2, 4}, nil}, 'or'}, + {mult_op, {1, {4, 5}, nil}, '/'}, + {decimal, {1, {5, 6}, nil}, 2}] = tokenize("&or/2"), + [{capture_op, {1, {1, 2}, nil}, '&'}, + {unary_op, {1, {2, 5}, nil}, 'not'}, + {decimal, {1, {6, 7}, nil}, 1}, + {',', {1, {7, 8}, nil}}, + {decimal, {1, {9, 10}, nil}, 2}] = tokenize("¬ 1, 2"). vc_merge_conflict_test() -> {1, "found an unexpected version control marker, please resolve the conflicts: ", "<<<<<<< HEAD"} = tokenize_error("<<<<<<< HEAD\n[1, 2, 3]"). sigil_terminator_test() -> - [{sigil, {1, 1, 8}, 114, [<<"foo">>], [], "/"}] = tokenize("~r/foo/"), - [{sigil, {1, 1, 8}, 114, [<<"foo">>], [], "["}] = tokenize("~r[foo]"), - [{sigil, {1, 1, 8}, 114, [<<"foo">>], [], "\""}] = tokenize("~r\"foo\""), - [{sigil, {1, 1, 1}, 83, [<<"sigil heredoc\n">>], [], "\"\"\""}] = tokenize("~S\"\"\"\nsigil heredoc\n\"\"\""), - [{sigil, {1, 1, 1}, 83, [<<"sigil heredoc\n">>], [], "'''"}] = tokenize("~S'''\nsigil heredoc\n'''"). + [{sigil, {1, {1, 8}, nil}, 114, [<<"foo">>], [], "/"}] = tokenize("~r/foo/"), + [{sigil, {1, {1, 8}, nil}, 114, [<<"foo">>], [], "["}] = tokenize("~r[foo]"), + [{sigil, {1, {1, 8}, nil}, 114, [<<"foo">>], [], "\""}] = tokenize("~r\"foo\""), + [{sigil, {1, {1, 1}, nil}, 83, [<<"sigil heredoc\n">>], [], "\"\"\""}] = tokenize("~S\"\"\"\nsigil heredoc\n\"\"\""), + [{sigil, {1, {1, 1}, nil}, 83, [<<"sigil heredoc\n">>], [], "'''"}] = tokenize("~S'''\nsigil heredoc\n'''"). invalid_sigil_delimiter_test() -> {1, "invalid sigil delimiter: ", Message} = tokenize_error("~s\\"), |