summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrea Leopardi <an.leopardi@gmail.com>2017-08-22 01:05:30 +0200
committerAndrea Leopardi <an.leopardi@gmail.com>2017-08-22 01:05:30 +0200
commit269c9f25e32a2e28118ba4e6a606d8ac3c8237d5 (patch)
tree6a5a7cb8ba9ce6e4e3ebee338556af518013790e
parent1dd4c16958663be472493044d0b6d84aa42b08d7 (diff)
downloadelixir-al/int-tokens.tar.gz
Rework how integers are tokenizedal/int-tokens
-rw-r--r--lib/elixir/src/elixir_parser.yrl19
-rw-r--r--lib/elixir/src/elixir_tokenizer.erl16
-rw-r--r--lib/elixir/test/elixir/code_test.exs12
-rw-r--r--lib/elixir/test/erlang/string_test.erl6
-rw-r--r--lib/elixir/test/erlang/tokenizer_test.erl70
5 files changed, 68 insertions, 55 deletions
diff --git a/lib/elixir/src/elixir_parser.yrl b/lib/elixir/src/elixir_parser.yrl
index 151668299..9aa7b9d5f 100644
--- a/lib/elixir/src/elixir_parser.yrl
+++ b/lib/elixir/src/elixir_parser.yrl
@@ -40,7 +40,7 @@ Terminals
capture_op rel_op
'true' 'false' 'nil' 'do' eol ';' ',' '.'
'(' ')' '[' ']' '{' '}' '<<' '>>' '%{}' '%'
- binary octal decimal float hex
+ base_integer decimal float
.
Rootsymbol grammar.
@@ -237,7 +237,7 @@ no_parens_zero_expr -> dot_identifier : build_identifier('$1', nil).
%% marks identifiers followed by brackets as bracket_identifier.
access_expr -> bracket_at_expr : '$1'.
access_expr -> bracket_expr : '$1'.
-access_expr -> capture_op_eol decimal : build_unary_op('$1', ?exprs('$2')).
+access_expr -> capture_op_eol decimal : build_unary_op('$1', parse_integer_literal(?exprs('$2'))).
access_expr -> fn_eoe stab end_eoe : build_fn('$1', reverse('$2')).
access_expr -> open_paren stab close_paren : build_stab(reverse('$2')).
access_expr -> open_paren stab ';' close_paren : build_stab(reverse('$2')).
@@ -262,10 +262,8 @@ access_expr -> max_expr : '$1'.
%% Augment integer literals with representation format if wrap_literals_in_blocks option is true
number -> char : handle_literal(?exprs('$1'), '$1', [{format, char}]).
-number -> binary : handle_literal(?exprs('$1'), '$1', [{format, binary}]).
-number -> octal : handle_literal(?exprs('$1'), '$1', [{format, octal}]).
-number -> decimal : handle_literal(?exprs('$1'), '$1', [{format, decimal}]).
-number -> hex : handle_literal(?exprs('$1'), '$1', [{format, hex}]).
+number -> decimal : handle_literal(parse_integer_literal(?exprs('$1')), '$1', [{original, ?exprs('$1')}]).
+number -> base_integer : handle_literal(parse_integer_literal(?exprs('$1')), '$1', [{original, ?exprs('$1')}]).
number -> float : handle_literal(?exprs('$1'), '$1').
%% Aliases and properly formed calls. Used by map_expr.
@@ -637,6 +635,15 @@ handle_literal(Literal, Token, ExtraMeta) ->
false -> Literal
end.
+parse_integer_literal([$0, $x | Rest]) ->
+ list_to_integer(Rest, 16);
+parse_integer_literal([$0, $o | Rest]) ->
+ list_to_integer(Rest, 8);
+parse_integer_literal([$0, $b | Rest]) ->
+ list_to_integer(Rest, 2);
+parse_integer_literal(Decimal) ->
+ list_to_integer(Decimal, 10).
+
%% Operators
build_op({_Kind, Location, 'in'}, {UOp, _, [Left]}, Right) when ?rearrange_uop(UOp) ->
diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl
index 99f42836e..4f7c90ff2 100644
--- a/lib/elixir/src/elixir_tokenizer.erl
+++ b/lib/elixir/src/elixir_tokenizer.erl
@@ -166,15 +166,15 @@ tokenize(("<<<<<<<" ++ _) = Original, Line, 1, _Scope, Tokens) ->
tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) ->
{Rest, Number, Length} = tokenize_hex(T, [H], 1),
- tokenize(Rest, Line, Column + 2 + Length, Scope, [{hex, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
+ tokenize(Rest, Line, Column + 2 + Length, Scope, [{base_integer, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) ->
{Rest, Number, Length} = tokenize_bin(T, [H], 1),
- tokenize(Rest, Line, Column + 2 + Length, Scope, [{binary, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
+ tokenize(Rest, Line, Column + 2 + Length, Scope, [{base_integer, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) ->
{Rest, Number, Length} = tokenize_octal(T, [H], 1),
- tokenize(Rest, Line, Column + 2 + Length, Scope, [{octal, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
+ tokenize(Rest, Line, Column + 2 + Length, Scope, [{base_integer, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
% Comments
@@ -420,7 +420,7 @@ tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) ->
case tokenize_number(T, [H], 1, false) of
{error, Reason, Number} ->
{error, {Line, Reason, Number}, T, Tokens};
- {Rest, Number, Length} when is_integer(Number) ->
+ {Rest, Number, Length} when is_list(Number) ->
tokenize(Rest, Line, Column + Length, Scope, [{decimal, {Line, Column, Column + Length}, Number} | Tokens]);
{Rest, Number, Length} ->
tokenize(Rest, Line, Column + Length, Scope, [{float, {Line, Column, Column + Length}, Number} | Tokens])
@@ -831,28 +831,28 @@ tokenize_number(Rest, Acc, Length, true) ->
%% Or integer.
tokenize_number(Rest, Acc, Length, false) ->
- {Rest, list_to_integer(lists:reverse(Acc)), Length}.
+ {Rest, lists:reverse(Acc), Length}.
tokenize_hex([H | T], Acc, Length) when ?is_hex(H) ->
tokenize_hex(T, [H | Acc], Length + 1);
tokenize_hex([$_, H | T], Acc, Length) when ?is_hex(H) ->
tokenize_hex(T, [H | Acc], Length + 2);
tokenize_hex(Rest, Acc, Length) ->
- {Rest, list_to_integer(lists:reverse(Acc), 16), Length}.
+ {Rest, [$0, $x | lists:reverse(Acc)], Length}.
tokenize_octal([H | T], Acc, Length) when ?is_octal(H) ->
tokenize_octal(T, [H | Acc], Length + 1);
tokenize_octal([$_, H | T], Acc, Length) when ?is_octal(H) ->
tokenize_octal(T, [H | Acc], Length + 2);
tokenize_octal(Rest, Acc, Length) ->
- {Rest, list_to_integer(lists:reverse(Acc), 8), Length}.
+ {Rest, [$0, $o | lists:reverse(Acc)], Length}.
tokenize_bin([H | T], Acc, Length) when ?is_bin(H) ->
tokenize_bin(T, [H | Acc], Length + 1);
tokenize_bin([$_, H | T], Acc, Length) when ?is_bin(H) ->
tokenize_bin(T, [H | Acc], Length + 2);
tokenize_bin(Rest, Acc, Length) ->
- {Rest, list_to_integer(lists:reverse(Acc), 2), Length}.
+ {Rest, [$0, $b | lists:reverse(Acc)], Length}.
%% Comments
diff --git a/lib/elixir/test/elixir/code_test.exs b/lib/elixir/test/elixir/code_test.exs
index 6fc40983a..4e0b24c4a 100644
--- a/lib/elixir/test/elixir/code_test.exs
+++ b/lib/elixir/test/elixir/code_test.exs
@@ -97,7 +97,7 @@ defmodule CodeTest do
test "string_to_quoted/1" do
assert Code.string_to_quoted("1 + 2") == {:ok, {:+, [line: 1], [1, 2]}}
- assert Code.string_to_quoted("a.1") == {:error, {1, "syntax error before: ", "1"}}
+ assert Code.string_to_quoted("a.1") == {:error, {1, "syntax error before: ", "\"1\""}}
end
test "string_to_quoted/1 for presence of sigils terminators" do
@@ -133,15 +133,15 @@ defmodule CodeTest do
assert Code.string_to_quoted("\"one\"", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], ["one"]}}
assert Code.string_to_quoted("\"one\"") == {:ok, "one"}
assert Code.string_to_quoted("?é", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :char, line: 1], [233]}}
- assert Code.string_to_quoted("0b10", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :binary, line: 1], [2]}}
- assert Code.string_to_quoted("12", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :decimal, line: 1], [12]}}
- assert Code.string_to_quoted("0o123", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :octal, line: 1], [83]}}
- assert Code.string_to_quoted("0xEF", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :hex, line: 1], [239]}}
+ assert Code.string_to_quoted("0b10", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '0b10', line: 1], [2]}}
+ assert Code.string_to_quoted("12", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '12', line: 1], [12]}}
+ assert Code.string_to_quoted("0o123", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '0o123', line: 1], [83]}}
+ assert Code.string_to_quoted("0xEF", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '0xEF', line: 1], [239]}}
assert Code.string_to_quoted("12.3", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], [12.3]}}
assert Code.string_to_quoted("nil", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], [nil]}}
assert Code.string_to_quoted(":one", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], [:one]}}
assert Code.string_to_quoted("[1]", wrap_literals_in_blocks: true) ==
- {:ok, {:__block__, [line: 1], [[{:__block__, [format: :decimal, line: 1], [1]}]]}}
+ {:ok, {:__block__, [line: 1], [[{:__block__, [original: '1', line: 1], [1]}]]}}
assert Code.string_to_quoted("{:ok, :test}", wrap_literals_in_blocks: true) ==
{:ok, {:__block__, [line: 1], [{{:__block__, [line: 1], [:ok]}, {:__block__, [line: 1], [:test]}}]}}
assert Code.string_to_quoted("\"\"\"\nhello\n\"\"\"", wrap_literals_in_blocks: true)
diff --git a/lib/elixir/test/erlang/string_test.erl b/lib/elixir/test/erlang/string_test.erl
index 8ec706bf6..e55fcf835 100644
--- a/lib/elixir/test/erlang/string_test.erl
+++ b/lib/elixir/test/erlang/string_test.erl
@@ -41,12 +41,12 @@ extract_interpolations_with_only_two_interpolations_test() ->
extract_interpolations_with_tuple_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 8}, [{'{', {1, 4, 5}}, {decimal, {1, 5, 6}, 1}, {'}', {1, 6, 7}}]},
+ {{1, 2, 8}, [{'{', {1, 4, 5}}, {decimal, {1, 5, 6}, "1"}, {'}', {1, 6, 7}}]},
<<"o">>] = extract_interpolations("f#{{1}}o").
extract_interpolations_with_many_expressions_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 3}, [{decimal, {1, 4, 5}, 1}, {eol, {1, 5, 6}}, {decimal, {2, 1, 2}, 2}]},
+ {{1, 2, 3}, [{decimal, {1, 4, 5}, "1"}, {eol, {1, 5, 6}}, {decimal, {2, 1, 2}, "2"}]},
<<"o">>] = extract_interpolations("f#{1\n2}o").
extract_interpolations_with_right_curly_inside_string_inside_interpolation_test() ->
@@ -66,7 +66,7 @@ extract_interpolations_with_escaped_quote_inside_string_inside_interpolation_tes
extract_interpolations_with_less_than_operation_inside_interpolation_test() ->
[<<"f">>,
- {{1, 2, 8}, [{decimal, {1, 4, 5}, 1}, {rel_op, {1, 5, 6}, '<'}, {decimal, {1, 6, 7}, 2}]},
+ {{1, 2, 8}, [{decimal, {1, 4, 5}, "1"}, {rel_op, {1, 5, 6}, '<'}, {decimal, {1, 6, 7}, "2"}]},
<<"o">>] = extract_interpolations("f#{1<2}o").
extract_interpolations_with_an_escaped_character_test() ->
diff --git a/lib/elixir/test/erlang/tokenizer_test.erl b/lib/elixir/test/erlang/tokenizer_test.erl
index da8c97474..b94017090 100644
--- a/lib/elixir/test/erlang/tokenizer_test.erl
+++ b/lib/elixir/test/erlang/tokenizer_test.erl
@@ -13,16 +13,20 @@ tokenize_error(String) ->
Error.
type_test() ->
- [{decimal, {1, 1, 2}, 1}, {type_op, {1, 3, 5}, '::'}, {decimal, {1, 6, 7}, 3}] = tokenize("1 :: 3"),
+ [{decimal, {1, 1, 2}, "1"}, {type_op, {1, 3, 5}, '::'}, {decimal, {1, 6, 7}, "3"}] = tokenize("1 :: 3"),
[{identifier, {1, 1, 5}, name},
{'.', {1, 5, 6}},
{paren_identifier, {1, 6, 8}, '::'},
{'(', {1, 8, 9}},
- {decimal, {1, 9, 10}, 3},
+ {decimal, {1, 9, 10}, "3"},
{')', {1, 10, 11}}] = tokenize("name.::(3)").
arithmetic_test() ->
- [{decimal, {1, 1, 2}, 1}, {dual_op, {1, 3, 4}, '+'}, {decimal, {1, 5, 6}, 2}, {dual_op, {1, 7, 8}, '+'}, {decimal, {1, 9, 10}, 3}] = tokenize("1 + 2 + 3").
+ [{decimal, {1, 1, 2}, "1"},
+ {dual_op, {1, 3, 4}, '+'},
+ {decimal, {1, 5, 6}, "2"},
+ {dual_op, {1, 7, 8}, '+'},
+ {decimal, {1, 9, 10}, "3"}] = tokenize("1 + 2 + 3").
op_kw_test() ->
[{atom, {1, 1, 5}, foo}, {dual_op, {1, 5, 6}, '+'}, {atom, {1, 6, 10}, bar}] = tokenize(":foo+:bar").
@@ -33,12 +37,12 @@ scientific_test() ->
{1, "invalid float number ", "1.0e309"} = tokenize_error("1.0e309").
hex_bin_octal_test() ->
- [{hex, {1, 1, 5}, 255}] = tokenize("0xFF"),
- [{hex, {1, 1, 6}, 255}] = tokenize("0xF_F"),
- [{octal, {1, 1, 5}, 63}] = tokenize("0o77"),
- [{octal, {1, 1, 6}, 63}] = tokenize("0o7_7"),
- [{binary, {1, 1, 5}, 3}] = tokenize("0b11"),
- [{binary, {1, 1, 6}, 3}] = tokenize("0b1_1").
+ [{base_integer, {1, 1, 5}, "0xFF"}] = tokenize("0xFF"),
+ [{base_integer, {1, 1, 6}, "0xFF"}] = tokenize("0xF_F"),
+ [{base_integer, {1, 1, 5}, "0o77"}] = tokenize("0o77"),
+ [{base_integer, {1, 1, 6}, "0o77"}] = tokenize("0o7_7"),
+ [{base_integer, {1, 1, 5}, "0b11"}] = tokenize("0b11"),
+ [{base_integer, {1, 1, 6}, "0b11"}] = tokenize("0b1_1").
unquoted_atom_test() ->
[{atom, {1, 1, 3}, '+'}] = tokenize(":+"),
@@ -68,10 +72,10 @@ kw_test() ->
[{kw_identifier_unsafe, {1, 1, 10}, [<<"foo bar">>]}] = tokenize("\"foo bar\": ").
integer_test() ->
- [{decimal, {1, 1, 4}, 123}] = tokenize("123"),
- [{decimal, {1, 1, 4}, 123}, {';', {1, 4, 5}}] = tokenize("123;"),
- [{eol, {1, 1, 2}}, {decimal, {3, 1, 4}, 123}] = tokenize("\n\n123"),
- [{decimal, {1, 3, 6}, 123}, {decimal, {1, 8, 11}, 234}] = tokenize(" 123 234 ").
+ [{decimal, {1, 1, 4}, "123"}] = tokenize("123"),
+ [{decimal, {1, 1, 4}, "123"}, {';', {1, 4, 5}}] = tokenize("123;"),
+ [{eol, {1, 1, 2}}, {decimal, {3, 1, 4}, "123"}] = tokenize("\n\n123"),
+ [{decimal, {1, 3, 6}, "123"}, {decimal, {1, 8, 11}, "234"}] = tokenize(" 123 234 ").
float_test() ->
[{float, {1, 1, 5}, 12.3}] = tokenize("12.3"),
@@ -82,9 +86,11 @@ float_test() ->
{1, "invalid float number ", OversizedFloat} = tokenize_error(OversizedFloat).
comments_test() ->
- [{decimal, {1, 1, 2}, 1}, {eol, {1, 3, 4}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2"),
- [{decimal, {1, 1, 2}, 1}, {comment, {1, 3, 12}, "# Comment"},
- {eol, {1, 12, 13}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]),
+ [{decimal, {1, 1, 2}, "1"}, {eol, {1, 3, 4}}, {decimal, {2, 1, 2}, "2"}] = tokenize("1 # Comment\n2"),
+ [{decimal, {1, 1, 2}, "1"},
+ {comment, {1, 3, 12}, "# Comment"},
+ {eol, {1, 12, 13}},
+ {decimal, {2, 1, 2}, "2"}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]),
[{comment, {1, 1, 10}, "# Comment"}] = tokenize("# Comment", [{preserve_comments, true}]).
identifier_test() ->
@@ -118,24 +124,24 @@ newline_test() ->
[{identifier, {1, 1, 4}, foo},
{'.', {2, 1, 2}},
{identifier, {2, 2, 5}, bar}] = tokenize("foo\n.bar"),
- [{decimal, {1, 1, 2}, 1},
+ [{decimal, {1, 1, 2}, "1"},
{two_op, {2, 1, 3}, '++'},
- {decimal, {2, 3, 4}, 2}] = tokenize("1\n++2").
+ {decimal, {2, 3, 4}, "2"}] = tokenize("1\n++2").
dot_newline_operator_test() ->
[{identifier, {1, 1, 4}, foo},
{'.', {1, 4, 5}},
{identifier, {2, 1, 2}, '+'},
- {decimal, {2, 2, 3}, 1}] = tokenize("foo.\n+1"),
+ {decimal, {2, 2, 3}, "1"}] = tokenize("foo.\n+1"),
[{identifier, {1, 1, 4}, foo},
{'.', {1, 4, 5}},
{identifier, {2, 1, 2}, '+'},
- {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1"),
+ {decimal, {2, 2, 3}, "1"}] = tokenize("foo.#bar\n+1"),
[{identifier, {1, 1, 4}, foo},
{'.', {1, 4, 5}},
{comment, {1, 5, 9}, "#bar"},
{identifier, {2, 1, 2}, '+'},
- {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]).
+ {decimal, {2, 2, 3}, "1"}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]).
aliases_test() ->
[{'aliases', {1, 1, 4}, ['Foo']}] = tokenize("Foo"),
@@ -158,8 +164,8 @@ addadd_test() ->
[{identifier, {1, 1, 2}, x}, {two_op, {1, 3, 5}, '++'}, {identifier, {1, 6, 7}, y}] = tokenize("x ++ y").
space_test() ->
- [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 5, 6}, '-'}, {decimal, {1, 6, 7}, 2}] = tokenize("foo -2"),
- [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 6, 7}, '-'}, {decimal, {1, 7, 8}, 2}] = tokenize("foo -2").
+ [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 5, 6}, '-'}, {decimal, {1, 6, 7}, "2"}] = tokenize("foo -2"),
+ [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 6, 7}, '-'}, {decimal, {1, 7, 8}, "2"}] = tokenize("foo -2").
chars_test() ->
[{char, {1, 1, 3}, 97}] = tokenize("?a"),
@@ -178,17 +184,17 @@ interpolation_test() ->
capture_test() ->
[{capture_op, {1, 1, 2}, '&'},
{identifier, {1, 2, 4}, '||'},
- {mult_op, {1, 4, 5}, '/'},
- {decimal, {1, 5, 6}, 2}] = tokenize("&||/2"),
+ {mult_op, {1, 4, 5}, '/'},
+ {decimal, {1, 5, 6}, "2"}] = tokenize("&||/2"),
[{capture_op, {1, 1, 2}, '&'},
{identifier, {1, 2, 4}, 'or'},
- {mult_op, {1, 4, 5}, '/'},
- {decimal, {1, 5, 6}, 2}] = tokenize("&or/2"),
- [{capture_op,{1,1,2},'&'},
- {unary_op,{1,2,5},'not'},
- {decimal,{1,6,7},1},
- {',',{1,7,8}},
- {decimal,{1,9,10},2}] = tokenize("&not 1, 2").
+ {mult_op, {1, 4, 5}, '/'},
+ {decimal, {1, 5, 6}, "2"}] = tokenize("&or/2"),
+ [{capture_op, {1, 1, 2}, '&'},
+ {unary_op, {1, 2, 5}, 'not'},
+ {decimal, {1, 6, 7}, "1"},
+ {',', {1, 7, 8}},
+ {decimal, {1 ,9, 10}, "2"}] = tokenize("&not 1, 2").
vc_merge_conflict_test() ->
{1, "found an unexpected version control marker, please resolve the conflicts: ", "<<<<<<< HEAD"} =