summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrea Leopardi <an.leopardi@gmail.com>2017-08-22 16:26:54 +0200
committerAndrea Leopardi <an.leopardi@gmail.com>2017-08-22 16:26:54 +0200
commit9fa1d609633b5e4e8b0fc51712822c291cee55fc (patch)
treea2ffc8368d9cbd94f38773f4dcc07c884ffef6da
parentdbaf38093c40d1b55ed6bdc4d7e5d0ac06e92d17 (diff)
downloadelixir-al/meta-for-tokens.tar.gz
Change the format of tokensal/meta-for-tokens
Right now, tokens are "{Token, Location}" or "{Token, Location, Value}". This commit changes "Location" from "{Line, StartColumn, EndColumn}" to "{Line, {StartColumn, EndColumn}, Meta}" where "Meta" can be anything. This will be used for things such as storing the format of integers.
-rw-r--r--lib/elixir/src/elixir_interpolation.erl4
-rw-r--r--lib/elixir/src/elixir_parser.yrl2
-rw-r--r--lib/elixir/src/elixir_tokenizer.erl179
-rw-r--r--lib/elixir/test/erlang/string_test.erl22
-rw-r--r--lib/elixir/test/erlang/tokenizer_test.erl261
5 files changed, 258 insertions, 210 deletions
diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl
index 099dabe66..a1f331235 100644
--- a/lib/elixir/src/elixir_interpolation.erl
+++ b/lib/elixir/src/elixir_interpolation.erl
@@ -45,7 +45,7 @@ extract(Line, Column, Scope, true, [$\\, $#, ${ | Rest], Buffer, Output, Last) -
extract(Line, Column, Scope, true, [$#, ${ | Rest], Buffer, Output, Last) ->
Output1 = build_string(Line, Buffer, Output),
case elixir_tokenizer:tokenize(Rest, Line, Column + 2, Scope) of
- {error, {{EndLine, _, EndColumn}, _, "}"}, [$} | NewRest], Tokens} ->
+ {error, {{EndLine, {_, EndColumn}, _}, _, "}"}, [$} | NewRest], Tokens} ->
Output2 = build_interpol(Line, Column, EndColumn, Tokens, Output1),
extract(EndLine, EndColumn, Scope, true, NewRest, [], Output2, Last);
{error, Reason, _, _} ->
@@ -211,4 +211,4 @@ build_string(_Line, Buffer, Output) ->
[elixir_utils:characters_to_binary(lists:reverse(Buffer)) | Output].
build_interpol(Line, Column, EndColumn, Buffer, Output) ->
- [{{Line, Column, EndColumn}, lists:reverse(Buffer)} | Output].
+ [{{Line, {Column, EndColumn}, nil}, lists:reverse(Buffer)} | Output].
diff --git a/lib/elixir/src/elixir_parser.yrl b/lib/elixir/src/elixir_parser.yrl
index 151668299..bc3fb28e4 100644
--- a/lib/elixir/src/elixir_parser.yrl
+++ b/lib/elixir/src/elixir_parser.yrl
@@ -623,7 +623,7 @@ Erlang code.
meta_from_token(Token, Counter) -> [{counter, Counter} | meta_from_token(Token)].
meta_from_token(Token) -> meta_from_location(?location(Token)).
-meta_from_location({Line, Column, EndColumn})
+meta_from_location({Line, {Column, EndColumn}, _})
when is_integer(Line), is_integer(Column), is_integer(EndColumn) -> [{line, Line}].
%% Handle metadata in literals
diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl
index 99f42836e..89f2eea9e 100644
--- a/lib/elixir/src/elixir_tokenizer.erl
+++ b/lib/elixir/src/elixir_tokenizer.erl
@@ -151,7 +151,7 @@ tokenize(String, Line, Opts) ->
tokenize([], Line, Column, #elixir_tokenizer{terminators=[]}, Tokens) ->
{ok, Line, Column, lists:reverse(Tokens)};
-tokenize([], EndLine, _Column, #elixir_tokenizer{terminators=[{Start, {StartLine, _, _}} | _]}, Tokens) ->
+tokenize([], EndLine, _Column, #elixir_tokenizer{terminators=[{Start, {StartLine, {_, _}, _}} | _]}, Tokens) ->
End = terminator(Start),
Message = io_lib:format("missing terminator: ~ts (for \"~ts\" starting at line ~B)", [End, Start, StartLine]),
{error, {EndLine, Message, []}, [], Tokens};
@@ -164,17 +164,32 @@ tokenize(("<<<<<<<" ++ _) = Original, Line, 1, _Scope, Tokens) ->
% Base integers
+% tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) ->
+% {Rest, Number, OriginalRepresentation, Length} = tokenize_hex(T, [H], 1),
+% Token = {integer, {Line, {Column, Column + 2 + Length}, Number}, OriginalRepresentation},
+% tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]);
+%
+% tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) ->
+% {Rest, Number, OriginalRepresentation, Length} = tokenize_bin(T, [H], 1),
+% Token = {integer, {Line, {Column, Column + 2 + Length}, Number}, OriginalRepresentation},
+% tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]);
+%
+% tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) ->
+% {Rest, Number, OriginalRepresentation, Length} = tokenize_octal(T, [H], 1),
+% Token = {integer, {Line, {Column, Column + 2 + Length}, Number}, OriginalRepresentation},
+% tokenize(Rest, Line, Column + 2 + Length, Scope, [Token | Tokens]);
+
tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) ->
{Rest, Number, Length} = tokenize_hex(T, [H], 1),
- tokenize(Rest, Line, Column + 2 + Length, Scope, [{hex, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
+ tokenize(Rest, Line, Column + 2 + Length, Scope, [{hex, {Line, {Column, Column + 2 + Length}, nil}, Number} | Tokens]);
tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) ->
{Rest, Number, Length} = tokenize_bin(T, [H], 1),
- tokenize(Rest, Line, Column + 2 + Length, Scope, [{binary, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
+ tokenize(Rest, Line, Column + 2 + Length, Scope, [{binary, {Line, {Column, Column + 2 + Length}, nil}, Number} | Tokens]);
tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) ->
{Rest, Number, Length} = tokenize_octal(T, [H], 1),
- tokenize(Rest, Line, Column + 2 + Length, Scope, [{octal, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
+ tokenize(Rest, Line, Column + 2 + Length, Scope, [{octal, {Line, {Column, Column + 2 + Length}, nil}, Number} | Tokens]);
% Comments
@@ -182,7 +197,7 @@ tokenize([$# | String], Line, Column, Scope, Tokens) ->
{Rest, Comment, Length} = tokenize_comment(String, [$#], 1),
case Scope#elixir_tokenizer.preserve_comments of
true ->
- CommentToken = {comment, {Line, Column, Column + Length}, Comment},
+ CommentToken = {comment, {Line, {Column, Column + Length}, nil}, Comment},
tokenize(Rest, Line, Column + Length, Scope, [CommentToken | Tokens]);
false ->
tokenize(Rest, Line, Column, Scope, Tokens)
@@ -194,7 +209,8 @@ tokenize([$~, S, H, H, H | T] = Original, Line, Column, Scope, Tokens) when ?is_
case extract_heredoc_with_interpolation(Line, Column, Scope, ?is_downcase(S), T, H) of
{ok, NewLine, NewColumn, Parts, Rest} ->
{Final, Modifiers} = collect_modifiers(Rest, []),
- tokenize(Final, NewLine, NewColumn, Scope, [{sigil, {Line, Column, NewColumn}, S, Parts, Modifiers, [H, H, H]} | Tokens]);
+ Token = {sigil, {Line, {Column, NewColumn}, nil}, S, Parts, Modifiers, [H, H, H]},
+ tokenize(Final, NewLine, NewColumn, Scope, [Token | Tokens]);
{error, Reason} ->
{error, Reason, Original, Tokens}
end;
@@ -203,7 +219,8 @@ tokenize([$~, S, H | T] = Original, Line, Column, Scope, Tokens) when ?is_sigil(
case elixir_interpolation:extract(Line, Column + 3, Scope, ?is_downcase(S), T, sigil_terminator(H)) of
{NewLine, NewColumn, Parts, Rest} ->
{Final, Modifiers} = collect_modifiers(Rest, []),
- tokenize(Final, NewLine, NewColumn, Scope, [{sigil, {Line, Column, NewColumn}, S, Parts, Modifiers, [H]} | Tokens]);
+ Token = {sigil, {Line, {Column, NewColumn}, nil}, S, Parts, Modifiers, [H]},
+ tokenize(Final, NewLine, NewColumn, Scope, [Token | Tokens]);
{error, Reason} ->
Sigil = [$~, S, H],
interpolation_error(Reason, Original, Tokens, " (for sigil ~ts starting at line ~B)", [Sigil, Line])
@@ -226,7 +243,8 @@ tokenize([$~, S, H | _] = Original, Line, Column, _Scope, Tokens) when ?is_upcas
tokenize([$?, $\\, H | T], Line, Column, Scope, Tokens) ->
Char = elixir_interpolation:unescape_map(H),
- tokenize(T, Line, Column + 3, Scope, [{char, {Line, Column, Column + 3}, Char} | Tokens]);
+ Token = {char, {Line, {Column, Column + 3}, nil}, Char},
+ tokenize(T, Line, Column + 3, Scope, [Token | Tokens]);
tokenize([$?, Char | T], Line, Column, Scope, Tokens) ->
case handle_char(Char) of
@@ -237,7 +255,8 @@ tokenize([$?, Char | T], Line, Column, Scope, Tokens) ->
false ->
ok
end,
- tokenize(T, Line, Column + 2, Scope, [{char, {Line, Column, Column + 2}, Char} | Tokens]);
+ Token = {char, {Line, {Column, Column + 2}, nil}, Char},
+ tokenize(T, Line, Column + 2, Scope, [Token | Tokens]);
% Heredocs
@@ -257,45 +276,48 @@ tokenize([$' | T], Line, Column, Scope, Tokens) ->
% Operator atoms
tokenize("...:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) ->
- tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, Column, Column + 4}, '...'} | Tokens]);
+ tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, {Column, Column + 4}, nil}, '...'} | Tokens]);
tokenize("<<>>:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) ->
- tokenize(Rest, Line, Column + 5, Scope, [{kw_identifier, {Line, Column, Column + 5}, '<<>>'} | Tokens]);
+ tokenize(Rest, Line, Column + 5, Scope, [{kw_identifier, {Line, {Column, Column + 5}, nil}, '<<>>'} | Tokens]);
tokenize("%{}:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) ->
- tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, Column, Column + 4}, '%{}'} | Tokens]);
+ tokenize(Rest, Line, Column + 4, Scope, [{kw_identifier, {Line, {Column, Column + 4}, nil}, '%{}'} | Tokens]);
tokenize("%:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) ->
- tokenize(Rest, Line, Column + 2, Scope, [{kw_identifier, {Line, Column, Column + 2}, '%'} | Tokens]);
+ tokenize(Rest, Line, Column + 2, Scope, [{kw_identifier, {Line, {Column, Column + 2}, nil}, '%'} | Tokens]);
tokenize("{}:" ++ Rest, Line, Column, Scope, Tokens) when ?is_space(hd(Rest)) ->
- tokenize(Rest, Line, Column + 3, Scope, [{kw_identifier, {Line, Column, Column + 3}, '{}'} | Tokens]);
+ tokenize(Rest, Line, Column + 3, Scope, [{kw_identifier, {Line, {Column, Column + 3}, nil}, '{}'} | Tokens]);
tokenize(":..." ++ Rest, Line, Column, Scope, Tokens) ->
- tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, Column, Column + 4}, '...'} | Tokens]);
+ tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, {Column, Column + 4}, nil}, '...'} | Tokens]);
tokenize(":<<>>" ++ Rest, Line, Column, Scope, Tokens) ->
- tokenize(Rest, Line, Column + 5, Scope, [{atom, {Line, Column, Column + 5}, '<<>>'} | Tokens]);
+ tokenize(Rest, Line, Column + 5, Scope, [{atom, {Line, {Column, Column + 5}, nil}, '<<>>'} | Tokens]);
tokenize(":%{}" ++ Rest, Line, Column, Scope, Tokens) ->
- tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, Column, Column + 4}, '%{}'} | Tokens]);
+ tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, {Column, Column + 4}, nil}, '%{}'} | Tokens]);
tokenize(":%" ++ Rest, Line, Column, Scope, Tokens) ->
- tokenize(Rest, Line, Column + 2, Scope, [{atom, {Line, Column, Column + 2}, '%'} | Tokens]);
+ tokenize(Rest, Line, Column + 2, Scope, [{atom, {Line, {Column, Column + 2}, nil}, '%'} | Tokens]);
tokenize(":{}" ++ Rest, Line, Column, Scope, Tokens) ->
- tokenize(Rest, Line, Column + 3, Scope, [{atom, {Line, Column, Column + 3}, '{}'} | Tokens]);
+ tokenize(Rest, Line, Column + 3, Scope, [{atom, {Line, {Column, Column + 3}, nil}, '{}'} | Tokens]);
% ## Three Token Operators
tokenize([$:, T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when
?unary_op3(T1, T2, T3); ?comp_op3(T1, T2, T3); ?and_op3(T1, T2, T3); ?or_op3(T1, T2, T3);
?arrow_op3(T1, T2, T3); ?three_op(T1, T2, T3) ->
- tokenize(Rest, Line, Column + 4, Scope, [{atom, {Line, Column, Column + 4}, list_to_atom([T1, T2, T3])} | Tokens]);
+ Token = {atom, {Line, {Column, Column + 4}, nil}, list_to_atom([T1, T2, T3])},
+ tokenize(Rest, Line, Column + 4, Scope, [Token | Tokens]);
% ## Two Token Operators
tokenize([$:, T1, T2 | Rest], Line, Column, Scope, Tokens) when
?comp_op2(T1, T2); ?rel_op2(T1, T2); ?and_op(T1, T2); ?or_op(T1, T2);
?arrow_op(T1, T2); ?in_match_op(T1, T2); ?two_op(T1, T2); ?stab_op(T1, T2);
?type_op(T1, T2) ->
- tokenize(Rest, Line, Column + 3, Scope, [{atom, {Line, Column, Column + 3}, list_to_atom([T1, T2])} | Tokens]);
+ Token = {atom, {Line, {Column, Column + 3}, nil}, list_to_atom([T1, T2])},
+ tokenize(Rest, Line, Column + 3, Scope, [Token | Tokens]);
% ## Single Token Operators
tokenize([$:, T | Rest], Line, Column, Scope, Tokens) when
?at_op(T); ?unary_op(T); ?capture_op(T); ?dual_op(T); ?mult_op(T);
?rel_op(T); ?match_op(T); ?pipe_op(T); T == $. ->
- tokenize(Rest, Line, Column + 2, Scope, [{atom, {Line, Column, Column + 2}, list_to_atom([T])} | Tokens]);
+ Token = {atom, {Line, {Column, Column + 2}, nil}, list_to_atom([T])},
+ tokenize(Rest, Line, Column + 2, Scope, [Token | Tokens]);
% Stand-alone tokens
@@ -304,7 +326,7 @@ tokenize("..." ++ Rest, Line, Column, Scope, Tokens) ->
tokenize(Rest, Line, Column + 3, Scope, [Token | Tokens]);
tokenize("=>" ++ Rest, Line, Column, Scope, Tokens) ->
- tokenize(Rest, Line, Column + 2, Scope, add_token_with_nl({assoc_op, {Line, Column, Column + 2}, '=>'}, Tokens));
+ tokenize(Rest, Line, Column + 2, Scope, add_token_with_nl({assoc_op, {Line, {Column, Column + 2}, nil}, '=>'}, Tokens));
% ## Three token operators
tokenize([T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when ?unary_op3(T1, T2, T3) ->
@@ -327,12 +349,12 @@ tokenize([T1, T2, T3 | Rest], Line, Column, Scope, Tokens) when ?arrow_op3(T1, T
% ## Containers + punctuation tokens
tokenize([T, T | Rest], Line, Column, Scope, Tokens) when T == $<; T == $> ->
- Token = {list_to_atom([T, T]), {Line, Column, Column + 2}},
+ Token = {list_to_atom([T, T]), {Line, {Column, Column + 2}, nil}},
handle_terminator(Rest, Line, Column + 2, Scope, Token, Tokens);
tokenize([T | Rest], Line, Column, Scope, Tokens) when T == $(;
T == ${; T == $}; T == $[; T == $]; T == $); T == $, ->
- Token = {list_to_atom([T]), {Line, Column, Column + 1}},
+ Token = {list_to_atom([T]), {Line, {Column, Column + 1}, nil}},
handle_terminator(Rest, Line, Column + 1, Scope, Token, Tokens);
% ## Two Token Operators
@@ -399,7 +421,8 @@ tokenize([$:, H | T] = Original, Line, Column, Scope, Tokens) when ?is_quote(H)
true -> atom_safe;
false -> atom_unsafe
end,
- tokenize(Rest, NewLine, NewColumn, Scope, [{Key, {Line, Column, NewColumn}, Unescaped} | Tokens]);
+ Token = {Key, {Line, {Column, NewColumn}, nil}, Unescaped},
+ tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens]);
{error, Reason} ->
interpolation_error(Reason, Original, Tokens, " (for atom starting at line ~B)", [Line])
end;
@@ -407,7 +430,8 @@ tokenize([$:, H | T] = Original, Line, Column, Scope, Tokens) when ?is_quote(H)
tokenize([$: | String] = Original, Line, Column, Scope, Tokens) ->
case tokenize_identifier(String, Line, Scope) of
{_Kind, Atom, Rest, Length, _Ascii, _Special} ->
- tokenize(Rest, Line, Column + 1 + Length, Scope, [{atom, {Line, Column, Column + 1 + Length}, Atom} | Tokens]);
+ Token = {atom, {Line, {Column, Column + 1 + Length}, nil}, Atom},
+ tokenize(Rest, Line, Column + 1 + Length, Scope, [Token | Tokens]);
empty ->
unexpected_token(Original, Line, Column, Tokens);
{error, Reason} ->
@@ -421,9 +445,11 @@ tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) ->
{error, Reason, Number} ->
{error, {Line, Reason, Number}, T, Tokens};
{Rest, Number, Length} when is_integer(Number) ->
- tokenize(Rest, Line, Column + Length, Scope, [{decimal, {Line, Column, Column + Length}, Number} | Tokens]);
+ Token = {decimal, {Line, {Column, Column + Length}, nil}, Number},
+ tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens]);
{Rest, Number, Length} ->
- tokenize(Rest, Line, Column + Length, Scope, [{float, {Line, Column, Column + Length}, Number} | Tokens])
+ Token = {float, {Line, {Column, Column + Length}, nil}, Number},
+ tokenize(Rest, Line, Column + Length, Scope, [Token | Tokens])
end;
% Spaces
@@ -435,10 +461,10 @@ tokenize([T | Rest], Line, Column, Scope, Tokens) when ?is_horizontal_space(T) -
% End of line
tokenize(";" ++ Rest, Line, Column, Scope, []) ->
- tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, Column, Column + 1}}]);
+ tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, {Column, Column + 1}, nil}}]);
tokenize(";" ++ Rest, Line, Column, Scope, [Top | _] = Tokens) when element(1, Top) /= ';' ->
- tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, Column, Column + 1}} | Tokens]);
+ tokenize(Rest, Line, Column + 1, Scope, [{';', {Line, {Column, Column + 1}, nil}} | Tokens]);
tokenize("\\" = Original, Line, _Column, _Scope, Tokens) ->
{error, {Line, "invalid escape \\ at end of file", []}, Original, Tokens};
@@ -464,10 +490,10 @@ tokenize("\r\n" ++ Rest, Line, Column, Scope, Tokens) ->
% Others
tokenize([$%, ${ | T], Line, Column, Scope, Tokens) ->
- tokenize([${ | T], Line, Column + 1, Scope, [{'%{}', {Line, Column, Column + 1}} | Tokens]);
+ tokenize([${ | T], Line, Column + 1, Scope, [{'%{}', {Line, {Column, Column + 1}, nil}} | Tokens]);
tokenize([$% | T], Line, Column, Scope, Tokens) ->
- tokenize(T, Line, Column + 1, Scope, [{'%', {Line, Column, Column + 1}} | Tokens]);
+ tokenize(T, Line, Column + 1, Scope, [{'%', {Line, {Column, Column + 1}, nil}} | Tokens]);
tokenize([$. | T], Line, Column, Scope, Tokens) ->
{Rest, Counter, Offset, CommentTokens} = strip_dot_space(T, 0, Column + 1, Line, []),
@@ -482,7 +508,8 @@ tokenize(String, Line, Column, Scope, Tokens) ->
case Rest of
[$: | T] when ?is_space(hd(T)) ->
- tokenize(T, Line, Column + Length + 1, Scope, [{kw_identifier, {Line, Column, Column + Length + 1}, Atom} | Tokens]);
+ Token = {kw_identifier, {Line, {Column, Column + Length + 1}, nil}, Atom},
+ tokenize(T, Line, Column + Length + 1, Scope, [Token | Tokens]);
[$: | T] when hd(T) /= $: ->
AtomName = atom_to_list(Atom) ++ [$:],
Reason = {Line, "keyword argument must be followed by space after: ", AtomName},
@@ -519,7 +546,7 @@ strip_dot_space(T, Counter, Column, StartLine, Tokens) ->
case strip_horizontal_space(T) of
{"#" ++ R, _} ->
{Rest, Comment, Length} = tokenize_comment(R, [$#], 1),
- CommentToken = {comment, {StartLine + Counter, Column, Column + Length}, Comment},
+ CommentToken = {comment, {StartLine + Counter, {Column, Column + Length}, nil}, Comment},
strip_dot_space(Rest, Counter, 1, StartLine, [CommentToken | Tokens]);
{"\r\n" ++ Rest, _} ->
strip_dot_space(Rest, Counter + 1, 1, StartLine, Tokens);
@@ -546,7 +573,7 @@ handle_char(_) -> false.
handle_heredocs(T, Line, Column, H, Scope, Tokens) ->
case extract_heredoc_with_interpolation(Line, Column, Scope, true, T, H) of
{ok, NewLine, NewColumn, Parts, Rest} ->
- Token = {heredoc_type(H), {Line, Column, NewColumn}, unescape_tokens(Parts)},
+ Token = {heredoc_type(H), {Line, {Column, NewColumn}, nil}, unescape_tokens(Parts)},
tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens]);
{error, Reason} ->
{error, Reason, [H, H, H] ++ T, Tokens}
@@ -562,37 +589,39 @@ handle_strings(T, Line, Column, H, Scope, Tokens) ->
true -> kw_identifier_safe;
false -> kw_identifier_unsafe
end,
- tokenize(Rest, NewLine, NewColumn, Scope, [{Key, {Line, Column - 1, NewColumn}, Unescaped} | Tokens]);
+ Token = {Key, {Line, {Column - 1, NewColumn}, nil}, Unescaped},
+ tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens]);
{NewLine, NewColumn, Parts, Rest} ->
- Token = {string_type(H), {Line, Column - 1, NewColumn}, unescape_tokens(Parts)},
+ Token = {string_type(H), {Line, {Column - 1, NewColumn}, nil}, unescape_tokens(Parts)},
tokenize(Rest, NewLine, NewColumn, Scope, [Token | Tokens])
end.
handle_unary_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) when ?is_space(hd(Rest)) ->
- tokenize(Rest, Line, Column + Length + 1, Scope, [{kw_identifier, {Line, Column, Column + Length}, Op} | Tokens]);
+ Token = {kw_identifier, {Line, {Column, Column + Length}, nil}, Op},
+ tokenize(Rest, Line, Column + Length + 1, Scope, [Token | Tokens]);
handle_unary_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) ->
case strip_horizontal_space(Rest) of
{[$/ | _] = Remaining, Extra} ->
- tokenize(Remaining, Line, Column + Length + Extra, Scope,
- [{identifier, {Line, Column, Column + Length}, Op} | Tokens]);
+ Token = {identifier, {Line, {Column, Column + Length}, nil}, Op},
+ tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]);
{Remaining, Extra} ->
- tokenize(Remaining, Line, Column + Length + Extra, Scope,
- [{Kind, {Line, Column, Column + Length}, Op} | Tokens])
+ Token = {Kind, {Line, {Column, Column + Length}, nil}, Op},
+ tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens])
end.
handle_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) when ?is_space(hd(Rest)) ->
- tokenize(Rest, Line, Column + Length + 1, Scope,
- [{kw_identifier, {Line, Column, Column + Length}, Op} | Tokens]);
+ Token = {kw_identifier, {Line, {Column, Column + Length}, nil}, Op},
+ tokenize(Rest, Line, Column + Length + 1, Scope, [Token | Tokens]);
handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) ->
case strip_horizontal_space(Rest) of
{[$/ | _] = Remaining, Extra} ->
- tokenize(Remaining, Line, Column + Length + Extra, Scope,
- [{identifier, {Line, Column, Column + Length}, Op} | Tokens]);
+ Token = {identifier, {Line, {Column, Column + Length}, nil}, Op},
+ tokenize(Remaining, Line, Column + Length + Extra, Scope, [Token | Tokens]);
{Remaining, Extra} ->
- tokenize(Remaining, Line, Column + Length + Extra, Scope,
- add_token_with_nl({Kind, {Line, Column, Column + Length}, Op}, Tokens))
+ Token = {Kind, {Line, {Column, Column + Length}, nil}, Op},
+ tokenize(Remaining, Line, Column + Length + Extra, Scope, add_token_with_nl(Token, Tokens))
end.
handle_comments(CommentTokens, Tokens, Scope) ->
@@ -622,7 +651,7 @@ handle_dot([$., T | Rest], Line, Column, DotColumn, Scope, Tokens, CommentTokens
% ## Exception for .( as it needs to be treated specially in the parser
handle_dot([$., $( | Rest], Line, Column, DotColumn, Scope, Tokens, CommentTokens, Counter) ->
- TokensSoFar = add_token_with_nl({dot_call_op, {Line, DotColumn, DotColumn + 1}, '.'}, Tokens),
+ TokensSoFar = add_token_with_nl({dot_call_op, {Line, {DotColumn, DotColumn + 1}, nil}, '.'}, Tokens),
tokenize([$( | Rest], Line + Counter, Column + 2, Scope, handle_comments(CommentTokens, TokensSoFar, Scope));
handle_dot([$., H | T] = Original, Line, Column, DotColumn, Scope, Tokens, CommentTokens, Counter) when ?is_quote(H) ->
@@ -631,7 +660,7 @@ handle_dot([$., H | T] = Original, Line, Column, DotColumn, Scope, Tokens, Comme
case unsafe_to_atom(Part, Line, Scope) of
{ok, Atom} ->
Token = check_call_identifier(Line + Counter, Column, max(NewColumn - Column, 0), Atom, Rest),
- TokensSoFar = add_token_with_nl({'.', {Line, DotColumn, DotColumn + 1}}, Tokens),
+ TokensSoFar = add_token_with_nl({'.', {Line, {DotColumn, DotColumn + 1}, nil}}, Tokens),
tokenize(Rest, NewLine, NewColumn, Scope, [Token | handle_comments(CommentTokens, TokensSoFar, Scope)]);
{error, Reason} ->
{error, Reason, Original, Tokens}
@@ -641,12 +670,12 @@ handle_dot([$., H | T] = Original, Line, Column, DotColumn, Scope, Tokens, Comme
end;
handle_dot([$. | Rest], Line, Column, DotColumn, Scope, Tokens, CommentTokens, Counter) ->
- TokensSoFar = add_token_with_nl({'.', {Line, DotColumn, DotColumn + 1}}, Tokens),
+ TokensSoFar = add_token_with_nl({'.', {Line, {DotColumn, DotColumn + 1}, nil}}, Tokens),
tokenize(Rest, Line + Counter, Column + 1, Scope, handle_comments(CommentTokens, TokensSoFar, Scope)).
handle_call_identifier(Rest, Line, Column, DotColumn, Length, Op, Scope, Tokens, CommentTokens, Counter) ->
- {_, {NewLine, _, NewColumn}, _} = Token = check_call_identifier(Line + Counter, Column, Length, Op, Rest),
- TokensSoFar = add_token_with_nl({'.', {Line, DotColumn, DotColumn + 1}}, Tokens),
+ {_, {NewLine, {_, NewColumn}, _}, _} = Token = check_call_identifier(Line + Counter, Column, Length, Op, Rest),
+ TokensSoFar = add_token_with_nl({'.', {Line, {DotColumn, DotColumn + 1}, nil}}, Tokens),
tokenize(Rest, NewLine, NewColumn, Scope, [Token | handle_comments(CommentTokens, TokensSoFar, Scope)]).
% ## Ambiguous unary/binary operators tokens
@@ -657,7 +686,8 @@ handle_space_sensitive_tokens([Sign, NotMarker | T], Line, Column, Scope, [{Iden
NotMarker /= $%, NotMarker /= $+, NotMarker /= $-, NotMarker /= $/, NotMarker /= $>, %% operators
Identifier == identifier ->
Rest = [NotMarker | T],
- tokenize(Rest, Line, Column + 1, Scope, [{dual_op, {Line, Column, Column + 1}, list_to_atom([Sign])}, setelement(1, H, op_identifier) | Tokens]);
+ DualOpToken = {dual_op, {Line, {Column, Column + 1}, nil}, list_to_atom([Sign])},
+ tokenize(Rest, Line, Column + 1, Scope, [DualOpToken, setelement(1, H, op_identifier) | Tokens]);
handle_space_sensitive_tokens(String, Line, Column, Scope, Tokens) ->
tokenize(String, Line, Column, Scope, Tokens).
@@ -667,7 +697,7 @@ handle_space_sensitive_tokens(String, Line, Column, Scope, Tokens) ->
eol(_Line, _Column, [{';', _} | _] = Tokens) -> Tokens;
eol(_Line, _Column, [{',', _} | _] = Tokens) -> Tokens;
eol(_Line, _Column, [{eol, _} | _] = Tokens) -> Tokens;
-eol(Line, Column, Tokens) -> [{eol, {Line, Column, Column + 1}} | Tokens].
+eol(Line, Column, Tokens) -> [{eol, {Line, {Column, Column + 1}, nil}} | Tokens].
unsafe_to_atom(Part, Line, #elixir_tokenizer{}) when
is_binary(Part) andalso size(Part) > 255;
@@ -925,16 +955,17 @@ tokenize_alias(Rest, Line, Column, Atom, Length, Ascii, Special, Scope, Tokens)
Reason = {Line, invalid_character_error("alias", hd(Special)), AtomName},
{error, Reason, AtomName ++ Rest, Tokens};
true ->
- tokenize(Rest, Line, Column + Length, Scope, [{aliases, {Line, Column, Column + Length}, [Atom]} | Tokens])
+ AliasesToken = {aliases, {Line, {Column, Column + Length}, nil}, [Atom]},
+ tokenize(Rest, Line, Column + Length, Scope, [AliasesToken | Tokens])
end.
tokenize_other(Rest, Line, Column, Atom, Length, Scope, Tokens) ->
case tokenize_keyword_or_identifier(Rest, Line, Column, Length, Atom, Tokens) of
- {keyword, Rest, {_, {_, _, EndColumn}} = Check, T} ->
+ {keyword, Rest, {_, {_, {_, EndColumn}, _}} = Check, T} ->
handle_terminator(Rest, Line, EndColumn, Scope, Check, T);
- {keyword, Rest, {_, {_, _, EndColumn}, _} = Check, T} ->
+ {keyword, Rest, {_, {_, {_, EndColumn}, _}, _} = Check, T} ->
handle_terminator(Rest, Line, EndColumn, Scope, Check, T);
- {identifier, Rest, {_, {_, _, EndColumn}, _} = Token} ->
+ {identifier, Rest, {_, {_, {_, EndColumn}, _}, _} = Token} ->
tokenize(Rest, Line, EndColumn, Scope, [Token | Tokens]);
{error, _, _, _} = Error ->
Error
@@ -944,8 +975,8 @@ tokenize_keyword_or_identifier(Rest, Line, Column, Length, Atom, Tokens) ->
case check_keyword(Line, Column, Length, Atom, Tokens, Rest) of
nomatch ->
{identifier, Rest, check_call_identifier(Line, Column, Length, Atom, Rest)};
- {ok, [{in_op, {_, _, InEndColumn}, in} | [{unary_op, {NotLine, NotColumn, _}, 'not'} | T]]} ->
- {keyword, Rest, {in_op, {NotLine, NotColumn, InEndColumn}, 'not in'}, T};
+ {ok, [{in_op, {_, {_, InEndColumn}, _}, in} | [{unary_op, {NotLine, {NotColumn, _}, _}, 'not'} | T]]} ->
+ {keyword, Rest, {in_op, {NotLine, {NotColumn, InEndColumn}, nil}, 'not in'}, T};
{ok, [Check | T]} ->
{keyword, Rest, Check, T};
{error, Message, Token} ->
@@ -955,11 +986,11 @@ tokenize_keyword_or_identifier(Rest, Line, Column, Length, Atom, Tokens) ->
%% Check if it is a call identifier (paren | bracket | do)
check_call_identifier(Line, Column, Length, Atom, [$( | _]) ->
- {paren_identifier, {Line, Column, Column + Length}, Atom};
+ {paren_identifier, {Line, {Column, Column + Length}, nil}, Atom};
check_call_identifier(Line, Column, Length, Atom, [$[ | _]) ->
- {bracket_identifier, {Line, Column, Column + Length}, Atom};
+ {bracket_identifier, {Line, {Column, Column + Length}, nil}, Atom};
check_call_identifier(Line, Column, Length, Atom, _Rest) ->
- {identifier, {Line, Column, Column + Length}, Atom}.
+ {identifier, {Line, {Column, Column + Length}, nil}, Atom}.
add_token_with_nl({unary_op, _, _} = Left, T) -> [Left | T];
add_token_with_nl(Left, [{eol, _} | T]) -> [Left | T];
@@ -1011,7 +1042,7 @@ check_terminator({E, _}, [{S, _} | Terminators]) when
S == '<<', E == '>>' ->
Terminators;
-check_terminator({E, {Line, _, _}}, [{Start, {StartLine, _, _}} | _]) when
+check_terminator({E, {Line, _, _}}, [{Start, {StartLine, {_, _}, _}} | _]) when
E == 'end'; E == ')'; E == ']'; E == '}'; E == '>>' ->
End = terminator(Start),
MessagePrefix = io_lib:format("\"~ts\" is missing terminator \"~ts\". unexpected token: \"",
@@ -1050,14 +1081,14 @@ terminator('<<') -> '>>'.
check_keyword(_Line, _Column, _Length, _Atom, [{'.', _} | _], _Rest) ->
nomatch;
check_keyword(DoLine, DoColumn, _Length, do,
- [{Identifier, {Line, Column, EndColumn}, Atom} | T], _Rest) when Identifier == identifier ->
- {ok, add_token_with_nl({do, {DoLine, DoColumn, DoColumn + 2}},
- [{do_identifier, {Line, Column, EndColumn}, Atom} | T])};
+ [{Identifier, {Line, {Column, EndColumn}, Meta}, Atom} | T], _Rest) when Identifier == identifier ->
+ {ok, add_token_with_nl({do, {DoLine, {DoColumn, DoColumn + 2}, nil}},
+ [{do_identifier, {Line, {Column, EndColumn}, Meta}, Atom} | T])};
check_keyword(_Line, _Column, _Length, do, [{'fn', _} | _], _Rest) ->
{error, do_with_fn_error("unexpected token \"do\""), "do"};
check_keyword(Line, Column, _Length, do, Tokens, _Rest) ->
case do_keyword_valid(Tokens) of
- true -> {ok, add_token_with_nl({do, {Line, Column, Column + 2}}, Tokens)};
+ true -> {ok, add_token_with_nl({do, {Line, {Column, Column + 2}, nil}}, Tokens)};
false -> {error, invalid_do_error("unexpected token \"do\""), "do"}
end;
check_keyword(Line, Column, Length, Atom, Tokens, Rest) ->
@@ -1065,15 +1096,15 @@ check_keyword(Line, Column, Length, Atom, Tokens, Rest) ->
false ->
nomatch;
token ->
- {ok, [{Atom, {Line, Column, Column + Length}} | Tokens]};
+ {ok, [{Atom, {Line, {Column, Column + Length}, nil}} | Tokens]};
block ->
- {ok, [{block_identifier, {Line, Column, Column + Length}, Atom} | Tokens]};
+ {ok, [{block_identifier, {Line, {Column, Column + Length}, nil}, Atom} | Tokens]};
Kind ->
case strip_horizontal_space(Rest) of
{[$/ | _], _} ->
- {ok, [{identifier, {Line, Column, Column + Length}, Atom} | Tokens]};
+ {ok, [{identifier, {Line, {Column, Column + Length}, nil}, Atom} | Tokens]};
_ ->
- {ok, add_token_with_nl({Kind, {Line, Column, Column + Length}, Atom}, Tokens)}
+ {ok, add_token_with_nl({Kind, {Line, {Column, Column + Length}, nil}, Atom}, Tokens)}
end
end.
diff --git a/lib/elixir/test/erlang/string_test.erl b/lib/elixir/test/erlang/string_test.erl
index 8ec706bf6..ca209cd7d 100644
--- a/lib/elixir/test/erlang/string_test.erl
+++ b/lib/elixir/test/erlang/string_test.erl
@@ -27,51 +27,51 @@ extract_interpolations_with_escaped_interpolation_test() ->
extract_interpolations_with_interpolation_test() ->
[<<"f">>,
- {{1, 2, 7}, [{atom, {1, 4, 6}, o}]},
+ {{1, {2, 7}, nil}, [{atom, {1, {4, 6}, nil}, o}]},
<<"o">>] = extract_interpolations("f#{:o}o").
extract_interpolations_with_two_interpolations_test() ->
[<<"f">>,
- {{1, 2, 7}, [{atom, {1, 4, 6}, o}]}, {{1, 7, 12}, [{atom, {1, 9, 11}, o}]},
+ {{1, {2, 7}, nil}, [{atom, {1, {4, 6}, nil}, o}]}, {{1, {7, 12}, nil}, [{atom, {1, {9, 11}, nil}, o}]},
<<"o">>] = extract_interpolations("f#{:o}#{:o}o").
extract_interpolations_with_only_two_interpolations_test() ->
- [{{1, 1, 6}, [{atom, {1, 3, 5}, o}]},
- {{1, 6, 11}, [{atom, {1, 8, 10}, o}]}] = extract_interpolations("#{:o}#{:o}").
+ [{{1, {1, 6}, nil}, [{atom, {1, {3, 5}, nil}, o}]},
+ {{1, {6, 11}, nil}, [{atom, {1, {8, 10}, nil}, o}]}] = extract_interpolations("#{:o}#{:o}").
extract_interpolations_with_tuple_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 8}, [{'{', {1, 4, 5}}, {decimal, {1, 5, 6}, 1}, {'}', {1, 6, 7}}]},
+ {{1, {2, 8}, nil}, [{'{', {1, {4, 5}, nil}}, {decimal, {1, {5, 6}, nil}, 1}, {'}', {1, {6, 7}, nil}}]},
<<"o">>] = extract_interpolations("f#{{1}}o").
extract_interpolations_with_many_expressions_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 3}, [{decimal, {1, 4, 5}, 1}, {eol, {1, 5, 6}}, {decimal, {2, 1, 2}, 2}]},
+ {{1, {2, 3}, nil}, [{decimal, {1, {4, 5}, nil}, 1}, {eol, {1, {5, 6}, nil}}, {decimal, {2, {1, 2}, nil}, 2}]},
<<"o">>] = extract_interpolations("f#{1\n2}o").
extract_interpolations_with_right_curly_inside_string_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 10}, [{bin_string, {1, 4, 9}, [<<"f}o">>]}]},
+ {{1, {2, 10}, nil}, [{bin_string, {1, {4, 9}, nil}, [<<"f}o">>]}]},
<<"o">>] = extract_interpolations("f#{\"f}o\"}o").
extract_interpolations_with_left_curly_inside_string_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 10}, [{bin_string, {1, 4, 9}, [<<"f{o">>]}]},
+ {{1, {2, 10}, nil}, [{bin_string, {1, {4, 9}, nil}, [<<"f{o">>]}]},
<<"o">>] = extract_interpolations("f#{\"f{o\"}o").
extract_interpolations_with_escaped_quote_inside_string_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 11}, [{bin_string, {1, 4, 10}, [<<"f\"o">>]}]},
+ {{1, {2, 11}, nil}, [{bin_string, {1, {4, 10}, nil}, [<<"f\"o">>]}]},
<<"o">>] = extract_interpolations("f#{\"f\\\"o\"}o").
extract_interpolations_with_less_than_operation_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 8}, [{decimal, {1, 4, 5}, 1}, {rel_op, {1, 5, 6}, '<'}, {decimal, {1, 6, 7}, 2}]},
+ {{1, {2, 8}, nil}, [{decimal, {1, {4, 5}, nil}, 1}, {rel_op, {1, {5, 6}, nil}, '<'}, {decimal, {1, {6, 7}, nil}, 2}]},
<<"o">>] = extract_interpolations("f#{1<2}o").
extract_interpolations_with_an_escaped_character_test() ->
[<<"f">>,
- {{1, 2, 17}, [{char, {1, 4, 7}, 7}, {rel_op, {1, 8, 9}, '>'}, {char, {1, 10, 13}, 7}]}
+ {{1, {2, 17}, nil}, [{char, {1, {4, 7}, nil}, 7}, {rel_op, {1, {8, 9}, nil}, '>'}, {char, {1, {10, 13}, nil}, 7}]}
] = extract_interpolations("f#{?\\a > ?\\a }").
extract_interpolations_with_invalid_expression_inside_interpolation_test() ->
diff --git a/lib/elixir/test/erlang/tokenizer_test.erl b/lib/elixir/test/erlang/tokenizer_test.erl
index da8c97474..94e522e1c 100644
--- a/lib/elixir/test/erlang/tokenizer_test.erl
+++ b/lib/elixir/test/erlang/tokenizer_test.erl
@@ -13,193 +13,210 @@ tokenize_error(String) ->
Error.
type_test() ->
- [{decimal, {1, 1, 2}, 1}, {type_op, {1, 3, 5}, '::'}, {decimal, {1, 6, 7}, 3}] = tokenize("1 :: 3"),
- [{identifier, {1, 1, 5}, name},
- {'.', {1, 5, 6}},
- {paren_identifier, {1, 6, 8}, '::'},
- {'(', {1, 8, 9}},
- {decimal, {1, 9, 10}, 3},
- {')', {1, 10, 11}}] = tokenize("name.::(3)").
+ [{decimal, {1, {1, 2}, nil}, 1},
+ {type_op, {1, {3, 5}, nil}, '::'},
+ {decimal, {1, {6, 7}, nil}, 3}] = tokenize("1 :: 3"),
+ [{identifier, {1, {1, 5}, nil}, name},
+ {'.', {1, {5, 6}, nil}},
+ {paren_identifier, {1, {6, 8}, nil}, '::'},
+ {'(', {1, {8, 9}, nil}},
+ {decimal, {1, {9, 10}, nil}, 3},
+ {')', {1, {10, 11}, nil}}] = tokenize("name.::(3)").
arithmetic_test() ->
- [{decimal, {1, 1, 2}, 1}, {dual_op, {1, 3, 4}, '+'}, {decimal, {1, 5, 6}, 2}, {dual_op, {1, 7, 8}, '+'}, {decimal, {1, 9, 10}, 3}] = tokenize("1 + 2 + 3").
+ [{decimal, {1, {1, 2}, nil}, 1},
+ {dual_op, {1, {3, 4}, nil}, '+'},
+ {decimal, {1, {5, 6}, nil}, 2},
+ {dual_op, {1, {7, 8}, nil}, '+'},
+ {decimal, {1, {9, 10}, nil}, 3}] = tokenize("1 + 2 + 3").
op_kw_test() ->
- [{atom, {1, 1, 5}, foo}, {dual_op, {1, 5, 6}, '+'}, {atom, {1, 6, 10}, bar}] = tokenize(":foo+:bar").
+ [{atom, {1, {1, 5}, nil}, foo},
+ {dual_op, {1, {5, 6}, nil}, '+'},
+ {atom, {1, {6, 10}, nil}, bar}] = tokenize(":foo+:bar").
scientific_test() ->
- [{float, {1, 1, 7}, 0.1}] = tokenize("1.0e-1"),
- [{float, {1, 1, 16}, 1.2345678e-7}] = tokenize("1_234.567_8e-10"),
+ [{float, {1, {1, 7}, nil}, 0.1}] = tokenize("1.0e-1"),
+ [{float, {1, {1, 16}, nil}, 1.2345678e-7}] = tokenize("1_234.567_8e-10"),
{1, "invalid float number ", "1.0e309"} = tokenize_error("1.0e309").
hex_bin_octal_test() ->
- [{hex, {1, 1, 5}, 255}] = tokenize("0xFF"),
- [{hex, {1, 1, 6}, 255}] = tokenize("0xF_F"),
- [{octal, {1, 1, 5}, 63}] = tokenize("0o77"),
- [{octal, {1, 1, 6}, 63}] = tokenize("0o7_7"),
- [{binary, {1, 1, 5}, 3}] = tokenize("0b11"),
- [{binary, {1, 1, 6}, 3}] = tokenize("0b1_1").
+ [{hex, {1, {1, 5}, nil}, 255}] = tokenize("0xFF"),
+ [{hex, {1, {1, 6}, nil}, 255}] = tokenize("0xF_F"),
+ [{octal, {1, {1, 5}, nil}, 63}] = tokenize("0o77"),
+ [{octal, {1, {1, 6}, nil}, 63}] = tokenize("0o7_7"),
+ [{binary, {1, {1, 5}, nil}, 3}] = tokenize("0b11"),
+ [{binary, {1, {1, 6}, nil}, 3}] = tokenize("0b1_1").
unquoted_atom_test() ->
- [{atom, {1, 1, 3}, '+'}] = tokenize(":+"),
- [{atom, {1, 1, 3}, '-'}] = tokenize(":-"),
- [{atom, {1, 1, 3}, '*'}] = tokenize(":*"),
- [{atom, {1, 1, 3}, '/'}] = tokenize(":/"),
- [{atom, {1, 1, 3}, '='}] = tokenize(":="),
- [{atom, {1, 1, 4}, '&&'}] = tokenize(":&&").
+ [{atom, {1, {1, 3}, nil}, '+'}] = tokenize(":+"),
+ [{atom, {1, {1, 3}, nil}, '-'}] = tokenize(":-"),
+ [{atom, {1, {1, 3}, nil}, '*'}] = tokenize(":*"),
+ [{atom, {1, {1, 3}, nil}, '/'}] = tokenize(":/"),
+ [{atom, {1, {1, 3}, nil}, '='}] = tokenize(":="),
+ [{atom, {1, {1, 4}, nil}, '&&'}] = tokenize(":&&").
quoted_atom_test() ->
- [{atom_unsafe, {1, 1, 11}, [<<"foo bar">>]}] = tokenize(":\"foo bar\"").
+ [{atom_unsafe, {1, {1, 11}, nil}, [<<"foo bar">>]}] = tokenize(":\"foo bar\"").
oversized_atom_test() ->
OversizedAtom = [$: | string:copies("a", 256)],
{1, "atom length must be less than system limit", ":"} = tokenize_error(OversizedAtom).
op_atom_test() ->
- [{atom, {1, 1, 6}, f0_1}] = tokenize(":f0_1").
+ [{atom, {1, {1, 6}, nil}, f0_1}] = tokenize(":f0_1").
kw_test() ->
- [{kw_identifier, {1, 1, 4}, do}] = tokenize("do: "),
- [{kw_identifier, {1, 1, 4}, a@}] = tokenize("a@: "),
- [{kw_identifier, {1, 1, 4}, 'A@'}] = tokenize("A@: "),
- [{kw_identifier, {1, 1, 5}, a@b}] = tokenize("a@b: "),
- [{kw_identifier, {1, 1, 5}, 'A@!'}] = tokenize("A@!: "),
- [{kw_identifier, {1, 1, 5}, 'a@!'}] = tokenize("a@!: "),
- [{kw_identifier_unsafe, {1, 1, 10}, [<<"foo bar">>]}] = tokenize("\"foo bar\": ").
+ [{kw_identifier, {1, {1, 4}, nil}, do}] = tokenize("do: "),
+ [{kw_identifier, {1, {1, 4}, nil}, a@}] = tokenize("a@: "),
+ [{kw_identifier, {1, {1, 4}, nil}, 'A@'}] = tokenize("A@: "),
+ [{kw_identifier, {1, {1, 5}, nil}, a@b}] = tokenize("a@b: "),
+ [{kw_identifier, {1, {1, 5}, nil}, 'A@!'}] = tokenize("A@!: "),
+ [{kw_identifier, {1, {1, 5}, nil}, 'a@!'}] = tokenize("a@!: "),
+ [{kw_identifier_unsafe, {1, {1, 10}, nil}, [<<"foo bar">>]}] = tokenize("\"foo bar\": ").
integer_test() ->
- [{decimal, {1, 1, 4}, 123}] = tokenize("123"),
- [{decimal, {1, 1, 4}, 123}, {';', {1, 4, 5}}] = tokenize("123;"),
- [{eol, {1, 1, 2}}, {decimal, {3, 1, 4}, 123}] = tokenize("\n\n123"),
- [{decimal, {1, 3, 6}, 123}, {decimal, {1, 8, 11}, 234}] = tokenize(" 123 234 ").
+ [{decimal, {1, {1, 4}, nil}, 123}] = tokenize("123"),
+ [{decimal, {1, {1, 4}, nil}, 123}, {';', {1, {4, 5}, nil}}] = tokenize("123;"),
+ [{eol, {1, {1, 2}, nil}}, {decimal, {3, {1, 4}, nil}, 123}] = tokenize("\n\n123"),
+ [{decimal, {1, {3, 6}, nil}, 123}, {decimal, {1, {8, 11}, nil}, 234}] = tokenize(" 123 234 ").
float_test() ->
- [{float, {1, 1, 5}, 12.3}] = tokenize("12.3"),
- [{float, {1, 1, 5}, 12.3}, {';', {1, 5, 6}}] = tokenize("12.3;"),
- [{eol, {1, 1, 2}}, {float, {3, 1, 5}, 12.3}] = tokenize("\n\n12.3"),
- [{float, {1, 3, 7}, 12.3}, {float, {1, 9, 13}, 23.4}] = tokenize(" 12.3 23.4 "),
+ [{float, {1, {1, 5}, nil}, 12.3}] = tokenize("12.3"),
+ [{float, {1, {1, 5}, nil}, 12.3}, {';', {1, {5, 6}, nil}}] = tokenize("12.3;"),
+ [{eol, {1, {1, 2}, nil}}, {float, {3, {1, 5}, nil}, 12.3}] = tokenize("\n\n12.3"),
+ [{float, {1, {3, 7}, nil}, 12.3}, {float, {1, {9, 13}, nil}, 23.4}] = tokenize(" 12.3 23.4 "),
OversizedFloat = string:copies("9", 310) ++ ".0",
{1, "invalid float number ", OversizedFloat} = tokenize_error(OversizedFloat).
comments_test() ->
- [{decimal, {1, 1, 2}, 1}, {eol, {1, 3, 4}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2"),
- [{decimal, {1, 1, 2}, 1}, {comment, {1, 3, 12}, "# Comment"},
- {eol, {1, 12, 13}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]),
- [{comment, {1, 1, 10}, "# Comment"}] = tokenize("# Comment", [{preserve_comments, true}]).
+ [{decimal, {1, {1, 2}, nil}, 1},
+ {eol, {1, {3, 4}, nil}},
+ {decimal, {2, {1, 2}, nil}, 2}] = tokenize("1 # Comment\n2"),
+ [{decimal, {1, {1, 2}, nil}, 1},
+ {comment, {1, {3, 12}, nil}, "# Comment"},
+ {eol, {1, {12, 13}, nil}},
+ {decimal, {2, {1, 2}, nil}, 2}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]),
+ [{comment, {1, {1, 10}, nil}, "# Comment"}] = tokenize("# Comment", [{preserve_comments, true}]).
identifier_test() ->
- [{identifier, {1, 1, 4}, abc}] = tokenize("abc "),
- [{identifier, {1, 1, 5}, 'abc?'}] = tokenize("abc?"),
- [{identifier, {1, 1, 5}, 'abc!'}] = tokenize("abc!"),
- [{identifier, {1, 1, 5}, 'a0c!'}] = tokenize("a0c!"),
- [{paren_identifier, {1, 1, 4}, 'a0c'}, {'(', {1, 4, 5}}, {')', {1, 5, 6}}] = tokenize("a0c()"),
- [{paren_identifier, {1, 1, 5}, 'a0c!'}, {'(', {1, 5, 6}}, {')', {1, 6, 7}}] = tokenize("a0c!()").
+ [{identifier, {1, {1, 4}, nil}, abc}] = tokenize("abc "),
+ [{identifier, {1, {1, 5}, nil}, 'abc?'}] = tokenize("abc?"),
+ [{identifier, {1, {1, 5}, nil}, 'abc!'}] = tokenize("abc!"),
+ [{identifier, {1, {1, 5}, nil}, 'a0c!'}] = tokenize("a0c!"),
+ [{paren_identifier, {1, {1, 4}, nil}, 'a0c'}, {'(', {1, {4, 5}, nil}}, {')', {1, {5, 6}, nil}}] = tokenize("a0c()"),
+ [{paren_identifier, {1, {1, 5}, nil}, 'a0c!'}, {'(', {1, {5, 6}, nil}}, {')', {1, {6, 7}, nil}}] = tokenize("a0c!()").
module_macro_test() ->
- [{identifier, {1, 1, 11}, '__MODULE__'}] = tokenize("__MODULE__").
+ [{identifier, {1, {1, 11}, nil}, '__MODULE__'}] = tokenize("__MODULE__").
triple_dot_test() ->
- [{identifier, {1, 1, 4}, '...'}] = tokenize("..."),
- [{'.', {1, 1, 2}}, {identifier, {1, 3, 5}, '..'}] = tokenize(". ..").
+ [{identifier, {1, {1, 4}, nil}, '...'}] = tokenize("..."),
+ [{'.', {1, {1, 2}, nil}}, {identifier, {1, {3, 5}, nil}, '..'}] = tokenize(". ..").
dot_test() ->
- [{identifier, {1, 1, 4}, foo},
- {'.', {1, 4, 5}},
- {identifier, {1, 5, 8}, bar},
- {'.', {1, 8, 9}},
- {identifier, {1, 9, 12}, baz}] = tokenize("foo.bar.baz").
+ [{identifier, {1, {1, 4}, nil}, foo},
+ {'.', {1, {4, 5}, nil}},
+ {identifier, {1, {5, 8}, nil}, bar},
+ {'.', {1, {8, 9}, nil}},
+ {identifier, {1, {9, 12}, nil}, baz}] = tokenize("foo.bar.baz").
dot_keyword_test() ->
- [{identifier, {1, 1, 4}, foo},
- {'.', {1, 4, 5}},
- {identifier, {1, 5, 7}, do}] = tokenize("foo.do").
+ [{identifier, {1, {1, 4}, nil}, foo},
+ {'.', {1, {4, 5}, nil}},
+ {identifier, {1, {5, 7}, nil}, do}] = tokenize("foo.do").
newline_test() ->
- [{identifier, {1, 1, 4}, foo},
- {'.', {2, 1, 2}},
- {identifier, {2, 2, 5}, bar}] = tokenize("foo\n.bar"),
- [{decimal, {1, 1, 2}, 1},
- {two_op, {2, 1, 3}, '++'},
- {decimal, {2, 3, 4}, 2}] = tokenize("1\n++2").
+ [{identifier, {1, {1, 4}, nil}, foo},
+ {'.', {2, {1, 2}, nil}},
+ {identifier, {2, {2, 5}, nil}, bar}] = tokenize("foo\n.bar"),
+ [{decimal, {1, {1, 2}, nil}, 1},
+ {two_op, {2, {1, 3}, nil}, '++'},
+ {decimal, {2, {3, 4}, nil}, 2}] = tokenize("1\n++2").
dot_newline_operator_test() ->
- [{identifier, {1, 1, 4}, foo},
- {'.', {1, 4, 5}},
- {identifier, {2, 1, 2}, '+'},
- {decimal, {2, 2, 3}, 1}] = tokenize("foo.\n+1"),
- [{identifier, {1, 1, 4}, foo},
- {'.', {1, 4, 5}},
- {identifier, {2, 1, 2}, '+'},
- {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1"),
- [{identifier, {1, 1, 4}, foo},
- {'.', {1, 4, 5}},
- {comment, {1, 5, 9}, "#bar"},
- {identifier, {2, 1, 2}, '+'},
- {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]).
+ [{identifier, {1, {1, 4}, nil}, foo},
+ {'.', {1, {4, 5}, nil}},
+ {identifier, {2, {1, 2}, nil}, '+'},
+ {decimal, {2, {2, 3}, nil}, 1}] = tokenize("foo.\n+1"),
+ [{identifier, {1, {1, 4}, nil}, foo},
+ {'.', {1, {4, 5}, nil}},
+ {identifier, {2, {1, 2}, nil}, '+'},
+ {decimal, {2, {2, 3}, nil}, 1}] = tokenize("foo.#bar\n+1"),
+ [{identifier, {1, {1, 4}, nil}, foo},
+ {'.', {1, {4, 5}, nil}},
+ {comment, {1, {5, 9}, nil}, "#bar"},
+ {identifier, {2, {1, 2}, nil}, '+'},
+ {decimal, {2, {2, 3}, nil}, 1}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]).
aliases_test() ->
- [{'aliases', {1, 1, 4}, ['Foo']}] = tokenize("Foo"),
- [{'aliases', {1, 1, 4}, ['Foo']},
- {'.', {1, 4, 5}},
- {'aliases', {1, 5, 8}, ['Bar']},
- {'.', {1, 8, 9}},
- {'aliases', {1, 9, 12}, ['Baz']}] = tokenize("Foo.Bar.Baz").
+ [{'aliases', {1, {1, 4}, nil}, ['Foo']}] = tokenize("Foo"),
+ [{'aliases', {1, {1, 4}, nil}, ['Foo']},
+ {'.', {1, {4, 5}, nil}},
+ {'aliases', {1, {5, 8}, nil}, ['Bar']},
+ {'.', {1, {8, 9}, nil}},
+ {'aliases', {1, {9, 12}, nil}, ['Baz']}] = tokenize("Foo.Bar.Baz").
string_test() ->
- [{bin_string, {1, 1, 6}, [<<"foo">>]}] = tokenize("\"foo\""),
- [{bin_string, {1, 1, 6}, [<<"f\"">>]}] = tokenize("\"f\\\"\""),
- [{list_string, {1, 1, 6}, [<<"foo">>]}] = tokenize("'foo'").
+ [{bin_string, {1, {1, 6}, nil}, [<<"foo">>]}] = tokenize("\"foo\""),
+ [{bin_string, {1, {1, 6}, nil}, [<<"f\"">>]}] = tokenize("\"f\\\"\""),
+ [{list_string, {1, {1, 6}, nil}, [<<"foo">>]}] = tokenize("'foo'").
empty_string_test() ->
- [{bin_string, {1, 1, 3}, [<<>>]}] = tokenize("\"\""),
- [{list_string, {1, 1, 3}, [<<>>]}] = tokenize("''").
+ [{bin_string, {1, {1, 3}, nil}, [<<>>]}] = tokenize("\"\""),
+ [{list_string, {1, {1, 3}, nil}, [<<>>]}] = tokenize("''").
addadd_test() ->
- [{identifier, {1, 1, 2}, x}, {two_op, {1, 3, 5}, '++'}, {identifier, {1, 6, 7}, y}] = tokenize("x ++ y").
+ [{identifier, {1, {1, 2}, nil}, x},
+ {two_op, {1, {3, 5}, nil}, '++'},
+ {identifier, {1, {6, 7}, nil}, y}] = tokenize("x ++ y").
space_test() ->
- [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 5, 6}, '-'}, {decimal, {1, 6, 7}, 2}] = tokenize("foo -2"),
- [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 6, 7}, '-'}, {decimal, {1, 7, 8}, 2}] = tokenize("foo -2").
+ [{op_identifier, {1, {1, 4}, nil}, foo},
+ {dual_op, {1, {5, 6}, nil}, '-'},
+ {decimal, {1, {6, 7}, nil}, 2}] = tokenize("foo -2"),
+ [{op_identifier, {1, {1, 4}, nil}, foo},
+ {dual_op, {1, {6, 7}, nil}, '-'},
+ {decimal, {1, {7, 8}, nil}, 2}] = tokenize("foo -2").
chars_test() ->
- [{char, {1, 1, 3}, 97}] = tokenize("?a"),
- [{char, {1, 1, 3}, 99}] = tokenize("?c"),
- [{char, {1, 1, 4}, 0}] = tokenize("?\\0"),
- [{char, {1, 1, 4}, 7}] = tokenize("?\\a"),
- [{char, {1, 1, 4}, 10}] = tokenize("?\\n"),
- [{char, {1, 1, 4}, 92}] = tokenize("?\\\\").
+ [{char, {1, {1, 3}, nil}, 97}] = tokenize("?a"),
+ [{char, {1, {1, 3}, nil}, 99}] = tokenize("?c"),
+ [{char, {1, {1, 4}, nil}, 0}] = tokenize("?\\0"),
+ [{char, {1, {1, 4}, nil}, 7}] = tokenize("?\\a"),
+ [{char, {1, {1, 4}, nil}, 10}] = tokenize("?\\n"),
+ [{char, {1, {1, 4}, nil}, 92}] = tokenize("?\\\\").
interpolation_test() ->
- [{bin_string, {1, 1, 9}, [<<"f">>,
- {{1, 3, 8}, [{identifier, {1, 5, 7}, oo}]}]},
- {two_op, {1, 10, 12}, '<>'}, {bin_string, {1, 13, 15},
- [<<>>]}] = tokenize("\"f#{oo}\" <> \"\"").
+ [{bin_string, {1, {1, 9}, nil}, [<<"f">>, {{1, {3, 8}, nil}, [{identifier, {1, {5, 7}, nil}, oo}]}]},
+ {two_op, {1, {10, 12}, nil}, '<>'},
+ {bin_string, {1, {13, 15}, nil}, [<<>>]}] = tokenize("\"f#{oo}\" <> \"\"").
capture_test() ->
- [{capture_op, {1, 1, 2}, '&'},
- {identifier, {1, 2, 4}, '||'},
- {mult_op, {1, 4, 5}, '/'},
- {decimal, {1, 5, 6}, 2}] = tokenize("&||/2"),
- [{capture_op, {1, 1, 2}, '&'},
- {identifier, {1, 2, 4}, 'or'},
- {mult_op, {1, 4, 5}, '/'},
- {decimal, {1, 5, 6}, 2}] = tokenize("&or/2"),
- [{capture_op,{1,1,2},'&'},
- {unary_op,{1,2,5},'not'},
- {decimal,{1,6,7},1},
- {',',{1,7,8}},
- {decimal,{1,9,10},2}] = tokenize("&not 1, 2").
+ [{capture_op, {1, {1, 2}, nil}, '&'},
+ {identifier, {1, {2, 4}, nil}, '||'},
+ {mult_op, {1, {4, 5}, nil}, '/'},
+ {decimal, {1, {5, 6}, nil}, 2}] = tokenize("&||/2"),
+ [{capture_op, {1, {1, 2}, nil}, '&'},
+ {identifier, {1, {2, 4}, nil}, 'or'},
+ {mult_op, {1, {4, 5}, nil}, '/'},
+ {decimal, {1, {5, 6}, nil}, 2}] = tokenize("&or/2"),
+ [{capture_op, {1, {1, 2}, nil}, '&'},
+ {unary_op, {1, {2, 5}, nil}, 'not'},
+ {decimal, {1, {6, 7}, nil}, 1},
+ {',', {1, {7, 8}, nil}},
+ {decimal, {1, {9, 10}, nil}, 2}] = tokenize("&not 1, 2").
vc_merge_conflict_test() ->
{1, "found an unexpected version control marker, please resolve the conflicts: ", "<<<<<<< HEAD"} =
tokenize_error("<<<<<<< HEAD\n[1, 2, 3]").
sigil_terminator_test() ->
- [{sigil, {1, 1, 8}, 114, [<<"foo">>], [], "/"}] = tokenize("~r/foo/"),
- [{sigil, {1, 1, 8}, 114, [<<"foo">>], [], "["}] = tokenize("~r[foo]"),
- [{sigil, {1, 1, 8}, 114, [<<"foo">>], [], "\""}] = tokenize("~r\"foo\""),
- [{sigil, {1, 1, 1}, 83, [<<"sigil heredoc\n">>], [], "\"\"\""}] = tokenize("~S\"\"\"\nsigil heredoc\n\"\"\""),
- [{sigil, {1, 1, 1}, 83, [<<"sigil heredoc\n">>], [], "'''"}] = tokenize("~S'''\nsigil heredoc\n'''").
+ [{sigil, {1, {1, 8}, nil}, 114, [<<"foo">>], [], "/"}] = tokenize("~r/foo/"),
+ [{sigil, {1, {1, 8}, nil}, 114, [<<"foo">>], [], "["}] = tokenize("~r[foo]"),
+ [{sigil, {1, {1, 8}, nil}, 114, [<<"foo">>], [], "\""}] = tokenize("~r\"foo\""),
+ [{sigil, {1, {1, 1}, nil}, 83, [<<"sigil heredoc\n">>], [], "\"\"\""}] = tokenize("~S\"\"\"\nsigil heredoc\n\"\"\""),
+ [{sigil, {1, {1, 1}, nil}, 83, [<<"sigil heredoc\n">>], [], "'''"}] = tokenize("~S'''\nsigil heredoc\n'''").
invalid_sigil_delimiter_test() ->
{1, "invalid sigil delimiter: ", Message} = tokenize_error("~s\\"),