diff options
author | Andrea Leopardi <an.leopardi@gmail.com> | 2017-08-22 01:05:30 +0200 |
---|---|---|
committer | Andrea Leopardi <an.leopardi@gmail.com> | 2017-08-22 01:05:30 +0200 |
commit | 269c9f25e32a2e28118ba4e6a606d8ac3c8237d5 (patch) | |
tree | 6a5a7cb8ba9ce6e4e3ebee338556af518013790e | |
parent | 1dd4c16958663be472493044d0b6d84aa42b08d7 (diff) | |
download | elixir-al/int-tokens.tar.gz |
Rework how integers are tokenizedal/int-tokens
-rw-r--r-- | lib/elixir/src/elixir_parser.yrl | 19 | ||||
-rw-r--r-- | lib/elixir/src/elixir_tokenizer.erl | 16 | ||||
-rw-r--r-- | lib/elixir/test/elixir/code_test.exs | 12 | ||||
-rw-r--r-- | lib/elixir/test/erlang/string_test.erl | 6 | ||||
-rw-r--r-- | lib/elixir/test/erlang/tokenizer_test.erl | 70 |
5 files changed, 68 insertions, 55 deletions
diff --git a/lib/elixir/src/elixir_parser.yrl b/lib/elixir/src/elixir_parser.yrl index 151668299..9aa7b9d5f 100644 --- a/lib/elixir/src/elixir_parser.yrl +++ b/lib/elixir/src/elixir_parser.yrl @@ -40,7 +40,7 @@ Terminals capture_op rel_op 'true' 'false' 'nil' 'do' eol ';' ',' '.' '(' ')' '[' ']' '{' '}' '<<' '>>' '%{}' '%' - binary octal decimal float hex + base_integer decimal float . Rootsymbol grammar. @@ -237,7 +237,7 @@ no_parens_zero_expr -> dot_identifier : build_identifier('$1', nil). %% marks identifiers followed by brackets as bracket_identifier. access_expr -> bracket_at_expr : '$1'. access_expr -> bracket_expr : '$1'. -access_expr -> capture_op_eol decimal : build_unary_op('$1', ?exprs('$2')). +access_expr -> capture_op_eol decimal : build_unary_op('$1', parse_integer_literal(?exprs('$2'))). access_expr -> fn_eoe stab end_eoe : build_fn('$1', reverse('$2')). access_expr -> open_paren stab close_paren : build_stab(reverse('$2')). access_expr -> open_paren stab ';' close_paren : build_stab(reverse('$2')). @@ -262,10 +262,8 @@ access_expr -> max_expr : '$1'. %% Augment integer literals with representation format if wrap_literals_in_blocks option is true number -> char : handle_literal(?exprs('$1'), '$1', [{format, char}]). -number -> binary : handle_literal(?exprs('$1'), '$1', [{format, binary}]). -number -> octal : handle_literal(?exprs('$1'), '$1', [{format, octal}]). -number -> decimal : handle_literal(?exprs('$1'), '$1', [{format, decimal}]). -number -> hex : handle_literal(?exprs('$1'), '$1', [{format, hex}]). +number -> decimal : handle_literal(parse_integer_literal(?exprs('$1')), '$1', [{original, ?exprs('$1')}]). +number -> base_integer : handle_literal(parse_integer_literal(?exprs('$1')), '$1', [{original, ?exprs('$1')}]). number -> float : handle_literal(?exprs('$1'), '$1'). %% Aliases and properly formed calls. Used by map_expr. @@ -637,6 +635,15 @@ handle_literal(Literal, Token, ExtraMeta) -> false -> Literal end. +parse_integer_literal([$0, $x | Rest]) -> + list_to_integer(Rest, 16); +parse_integer_literal([$0, $o | Rest]) -> + list_to_integer(Rest, 8); +parse_integer_literal([$0, $b | Rest]) -> + list_to_integer(Rest, 2); +parse_integer_literal(Decimal) -> + list_to_integer(Decimal, 10). + %% Operators build_op({_Kind, Location, 'in'}, {UOp, _, [Left]}, Right) when ?rearrange_uop(UOp) -> diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 99f42836e..4f7c90ff2 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -166,15 +166,15 @@ tokenize(("<<<<<<<" ++ _) = Original, Line, 1, _Scope, Tokens) -> tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) -> {Rest, Number, Length} = tokenize_hex(T, [H], 1), - tokenize(Rest, Line, Column + 2 + Length, Scope, [{hex, {Line, Column, Column + 2 + Length}, Number} | Tokens]); + tokenize(Rest, Line, Column + 2 + Length, Scope, [{base_integer, {Line, Column, Column + 2 + Length}, Number} | Tokens]); tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) -> {Rest, Number, Length} = tokenize_bin(T, [H], 1), - tokenize(Rest, Line, Column + 2 + Length, Scope, [{binary, {Line, Column, Column + 2 + Length}, Number} | Tokens]); + tokenize(Rest, Line, Column + 2 + Length, Scope, [{base_integer, {Line, Column, Column + 2 + Length}, Number} | Tokens]); tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) -> {Rest, Number, Length} = tokenize_octal(T, [H], 1), - tokenize(Rest, Line, Column + 2 + Length, Scope, [{octal, {Line, Column, Column + 2 + Length}, Number} | Tokens]); + tokenize(Rest, Line, Column + 2 + Length, Scope, [{base_integer, {Line, Column, Column + 2 + Length}, Number} | Tokens]); % Comments @@ -420,7 +420,7 @@ tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) -> case tokenize_number(T, [H], 1, false) of {error, Reason, Number} -> {error, {Line, Reason, Number}, T, Tokens}; - {Rest, Number, Length} when is_integer(Number) -> + {Rest, Number, Length} when is_list(Number) -> tokenize(Rest, Line, Column + Length, Scope, [{decimal, {Line, Column, Column + Length}, Number} | Tokens]); {Rest, Number, Length} -> tokenize(Rest, Line, Column + Length, Scope, [{float, {Line, Column, Column + Length}, Number} | Tokens]) @@ -831,28 +831,28 @@ tokenize_number(Rest, Acc, Length, true) -> %% Or integer. tokenize_number(Rest, Acc, Length, false) -> - {Rest, list_to_integer(lists:reverse(Acc)), Length}. + {Rest, lists:reverse(Acc), Length}. tokenize_hex([H | T], Acc, Length) when ?is_hex(H) -> tokenize_hex(T, [H | Acc], Length + 1); tokenize_hex([$_, H | T], Acc, Length) when ?is_hex(H) -> tokenize_hex(T, [H | Acc], Length + 2); tokenize_hex(Rest, Acc, Length) -> - {Rest, list_to_integer(lists:reverse(Acc), 16), Length}. + {Rest, [$0, $x | lists:reverse(Acc)], Length}. tokenize_octal([H | T], Acc, Length) when ?is_octal(H) -> tokenize_octal(T, [H | Acc], Length + 1); tokenize_octal([$_, H | T], Acc, Length) when ?is_octal(H) -> tokenize_octal(T, [H | Acc], Length + 2); tokenize_octal(Rest, Acc, Length) -> - {Rest, list_to_integer(lists:reverse(Acc), 8), Length}. + {Rest, [$0, $o | lists:reverse(Acc)], Length}. tokenize_bin([H | T], Acc, Length) when ?is_bin(H) -> tokenize_bin(T, [H | Acc], Length + 1); tokenize_bin([$_, H | T], Acc, Length) when ?is_bin(H) -> tokenize_bin(T, [H | Acc], Length + 2); tokenize_bin(Rest, Acc, Length) -> - {Rest, list_to_integer(lists:reverse(Acc), 2), Length}. + {Rest, [$0, $b | lists:reverse(Acc)], Length}. %% Comments diff --git a/lib/elixir/test/elixir/code_test.exs b/lib/elixir/test/elixir/code_test.exs index 6fc40983a..4e0b24c4a 100644 --- a/lib/elixir/test/elixir/code_test.exs +++ b/lib/elixir/test/elixir/code_test.exs @@ -97,7 +97,7 @@ defmodule CodeTest do test "string_to_quoted/1" do assert Code.string_to_quoted("1 + 2") == {:ok, {:+, [line: 1], [1, 2]}} - assert Code.string_to_quoted("a.1") == {:error, {1, "syntax error before: ", "1"}} + assert Code.string_to_quoted("a.1") == {:error, {1, "syntax error before: ", "\"1\""}} end test "string_to_quoted/1 for presence of sigils terminators" do @@ -133,15 +133,15 @@ defmodule CodeTest do assert Code.string_to_quoted("\"one\"", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], ["one"]}} assert Code.string_to_quoted("\"one\"") == {:ok, "one"} assert Code.string_to_quoted("?é", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :char, line: 1], [233]}} - assert Code.string_to_quoted("0b10", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :binary, line: 1], [2]}} - assert Code.string_to_quoted("12", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :decimal, line: 1], [12]}} - assert Code.string_to_quoted("0o123", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :octal, line: 1], [83]}} - assert Code.string_to_quoted("0xEF", wrap_literals_in_blocks: true) == {:ok, {:__block__, [format: :hex, line: 1], [239]}} + assert Code.string_to_quoted("0b10", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '0b10', line: 1], [2]}} + assert Code.string_to_quoted("12", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '12', line: 1], [12]}} + assert Code.string_to_quoted("0o123", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '0o123', line: 1], [83]}} + assert Code.string_to_quoted("0xEF", wrap_literals_in_blocks: true) == {:ok, {:__block__, [original: '0xEF', line: 1], [239]}} assert Code.string_to_quoted("12.3", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], [12.3]}} assert Code.string_to_quoted("nil", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], [nil]}} assert Code.string_to_quoted(":one", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], [:one]}} assert Code.string_to_quoted("[1]", wrap_literals_in_blocks: true) == - {:ok, {:__block__, [line: 1], [[{:__block__, [format: :decimal, line: 1], [1]}]]}} + {:ok, {:__block__, [line: 1], [[{:__block__, [original: '1', line: 1], [1]}]]}} assert Code.string_to_quoted("{:ok, :test}", wrap_literals_in_blocks: true) == {:ok, {:__block__, [line: 1], [{{:__block__, [line: 1], [:ok]}, {:__block__, [line: 1], [:test]}}]}} assert Code.string_to_quoted("\"\"\"\nhello\n\"\"\"", wrap_literals_in_blocks: true) diff --git a/lib/elixir/test/erlang/string_test.erl b/lib/elixir/test/erlang/string_test.erl index 8ec706bf6..e55fcf835 100644 --- a/lib/elixir/test/erlang/string_test.erl +++ b/lib/elixir/test/erlang/string_test.erl @@ -41,12 +41,12 @@ extract_interpolations_with_only_two_interpolations_test() -> extract_interpolations_with_tuple_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 8}, [{'{', {1, 4, 5}}, {decimal, {1, 5, 6}, 1}, {'}', {1, 6, 7}}]}, + {{1, 2, 8}, [{'{', {1, 4, 5}}, {decimal, {1, 5, 6}, "1"}, {'}', {1, 6, 7}}]}, <<"o">>] = extract_interpolations("f#{{1}}o"). extract_interpolations_with_many_expressions_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 3}, [{decimal, {1, 4, 5}, 1}, {eol, {1, 5, 6}}, {decimal, {2, 1, 2}, 2}]}, + {{1, 2, 3}, [{decimal, {1, 4, 5}, "1"}, {eol, {1, 5, 6}}, {decimal, {2, 1, 2}, "2"}]}, <<"o">>] = extract_interpolations("f#{1\n2}o"). extract_interpolations_with_right_curly_inside_string_inside_interpolation_test() -> @@ -66,7 +66,7 @@ extract_interpolations_with_escaped_quote_inside_string_inside_interpolation_tes extract_interpolations_with_less_than_operation_inside_interpolation_test() -> [<<"f">>, - {{1, 2, 8}, [{decimal, {1, 4, 5}, 1}, {rel_op, {1, 5, 6}, '<'}, {decimal, {1, 6, 7}, 2}]}, + {{1, 2, 8}, [{decimal, {1, 4, 5}, "1"}, {rel_op, {1, 5, 6}, '<'}, {decimal, {1, 6, 7}, "2"}]}, <<"o">>] = extract_interpolations("f#{1<2}o"). extract_interpolations_with_an_escaped_character_test() -> diff --git a/lib/elixir/test/erlang/tokenizer_test.erl b/lib/elixir/test/erlang/tokenizer_test.erl index da8c97474..b94017090 100644 --- a/lib/elixir/test/erlang/tokenizer_test.erl +++ b/lib/elixir/test/erlang/tokenizer_test.erl @@ -13,16 +13,20 @@ tokenize_error(String) -> Error. type_test() -> - [{decimal, {1, 1, 2}, 1}, {type_op, {1, 3, 5}, '::'}, {decimal, {1, 6, 7}, 3}] = tokenize("1 :: 3"), + [{decimal, {1, 1, 2}, "1"}, {type_op, {1, 3, 5}, '::'}, {decimal, {1, 6, 7}, "3"}] = tokenize("1 :: 3"), [{identifier, {1, 1, 5}, name}, {'.', {1, 5, 6}}, {paren_identifier, {1, 6, 8}, '::'}, {'(', {1, 8, 9}}, - {decimal, {1, 9, 10}, 3}, + {decimal, {1, 9, 10}, "3"}, {')', {1, 10, 11}}] = tokenize("name.::(3)"). arithmetic_test() -> - [{decimal, {1, 1, 2}, 1}, {dual_op, {1, 3, 4}, '+'}, {decimal, {1, 5, 6}, 2}, {dual_op, {1, 7, 8}, '+'}, {decimal, {1, 9, 10}, 3}] = tokenize("1 + 2 + 3"). + [{decimal, {1, 1, 2}, "1"}, + {dual_op, {1, 3, 4}, '+'}, + {decimal, {1, 5, 6}, "2"}, + {dual_op, {1, 7, 8}, '+'}, + {decimal, {1, 9, 10}, "3"}] = tokenize("1 + 2 + 3"). op_kw_test() -> [{atom, {1, 1, 5}, foo}, {dual_op, {1, 5, 6}, '+'}, {atom, {1, 6, 10}, bar}] = tokenize(":foo+:bar"). @@ -33,12 +37,12 @@ scientific_test() -> {1, "invalid float number ", "1.0e309"} = tokenize_error("1.0e309"). hex_bin_octal_test() -> - [{hex, {1, 1, 5}, 255}] = tokenize("0xFF"), - [{hex, {1, 1, 6}, 255}] = tokenize("0xF_F"), - [{octal, {1, 1, 5}, 63}] = tokenize("0o77"), - [{octal, {1, 1, 6}, 63}] = tokenize("0o7_7"), - [{binary, {1, 1, 5}, 3}] = tokenize("0b11"), - [{binary, {1, 1, 6}, 3}] = tokenize("0b1_1"). + [{base_integer, {1, 1, 5}, "0xFF"}] = tokenize("0xFF"), + [{base_integer, {1, 1, 6}, "0xFF"}] = tokenize("0xF_F"), + [{base_integer, {1, 1, 5}, "0o77"}] = tokenize("0o77"), + [{base_integer, {1, 1, 6}, "0o77"}] = tokenize("0o7_7"), + [{base_integer, {1, 1, 5}, "0b11"}] = tokenize("0b11"), + [{base_integer, {1, 1, 6}, "0b11"}] = tokenize("0b1_1"). unquoted_atom_test() -> [{atom, {1, 1, 3}, '+'}] = tokenize(":+"), @@ -68,10 +72,10 @@ kw_test() -> [{kw_identifier_unsafe, {1, 1, 10}, [<<"foo bar">>]}] = tokenize("\"foo bar\": "). integer_test() -> - [{decimal, {1, 1, 4}, 123}] = tokenize("123"), - [{decimal, {1, 1, 4}, 123}, {';', {1, 4, 5}}] = tokenize("123;"), - [{eol, {1, 1, 2}}, {decimal, {3, 1, 4}, 123}] = tokenize("\n\n123"), - [{decimal, {1, 3, 6}, 123}, {decimal, {1, 8, 11}, 234}] = tokenize(" 123 234 "). + [{decimal, {1, 1, 4}, "123"}] = tokenize("123"), + [{decimal, {1, 1, 4}, "123"}, {';', {1, 4, 5}}] = tokenize("123;"), + [{eol, {1, 1, 2}}, {decimal, {3, 1, 4}, "123"}] = tokenize("\n\n123"), + [{decimal, {1, 3, 6}, "123"}, {decimal, {1, 8, 11}, "234"}] = tokenize(" 123 234 "). float_test() -> [{float, {1, 1, 5}, 12.3}] = tokenize("12.3"), @@ -82,9 +86,11 @@ float_test() -> {1, "invalid float number ", OversizedFloat} = tokenize_error(OversizedFloat). comments_test() -> - [{decimal, {1, 1, 2}, 1}, {eol, {1, 3, 4}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2"), - [{decimal, {1, 1, 2}, 1}, {comment, {1, 3, 12}, "# Comment"}, - {eol, {1, 12, 13}}, {decimal, {2, 1, 2}, 2}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]), + [{decimal, {1, 1, 2}, "1"}, {eol, {1, 3, 4}}, {decimal, {2, 1, 2}, "2"}] = tokenize("1 # Comment\n2"), + [{decimal, {1, 1, 2}, "1"}, + {comment, {1, 3, 12}, "# Comment"}, + {eol, {1, 12, 13}}, + {decimal, {2, 1, 2}, "2"}] = tokenize("1 # Comment\n2", [{preserve_comments, true}]), [{comment, {1, 1, 10}, "# Comment"}] = tokenize("# Comment", [{preserve_comments, true}]). identifier_test() -> @@ -118,24 +124,24 @@ newline_test() -> [{identifier, {1, 1, 4}, foo}, {'.', {2, 1, 2}}, {identifier, {2, 2, 5}, bar}] = tokenize("foo\n.bar"), - [{decimal, {1, 1, 2}, 1}, + [{decimal, {1, 1, 2}, "1"}, {two_op, {2, 1, 3}, '++'}, - {decimal, {2, 3, 4}, 2}] = tokenize("1\n++2"). + {decimal, {2, 3, 4}, "2"}] = tokenize("1\n++2"). dot_newline_operator_test() -> [{identifier, {1, 1, 4}, foo}, {'.', {1, 4, 5}}, {identifier, {2, 1, 2}, '+'}, - {decimal, {2, 2, 3}, 1}] = tokenize("foo.\n+1"), + {decimal, {2, 2, 3}, "1"}] = tokenize("foo.\n+1"), [{identifier, {1, 1, 4}, foo}, {'.', {1, 4, 5}}, {identifier, {2, 1, 2}, '+'}, - {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1"), + {decimal, {2, 2, 3}, "1"}] = tokenize("foo.#bar\n+1"), [{identifier, {1, 1, 4}, foo}, {'.', {1, 4, 5}}, {comment, {1, 5, 9}, "#bar"}, {identifier, {2, 1, 2}, '+'}, - {decimal, {2, 2, 3}, 1}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]). + {decimal, {2, 2, 3}, "1"}] = tokenize("foo.#bar\n+1", [{preserve_comments, true}]). aliases_test() -> [{'aliases', {1, 1, 4}, ['Foo']}] = tokenize("Foo"), @@ -158,8 +164,8 @@ addadd_test() -> [{identifier, {1, 1, 2}, x}, {two_op, {1, 3, 5}, '++'}, {identifier, {1, 6, 7}, y}] = tokenize("x ++ y"). space_test() -> - [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 5, 6}, '-'}, {decimal, {1, 6, 7}, 2}] = tokenize("foo -2"), - [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 6, 7}, '-'}, {decimal, {1, 7, 8}, 2}] = tokenize("foo -2"). + [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 5, 6}, '-'}, {decimal, {1, 6, 7}, "2"}] = tokenize("foo -2"), + [{op_identifier, {1, 1, 4}, foo}, {dual_op, {1, 6, 7}, '-'}, {decimal, {1, 7, 8}, "2"}] = tokenize("foo -2"). chars_test() -> [{char, {1, 1, 3}, 97}] = tokenize("?a"), @@ -178,17 +184,17 @@ interpolation_test() -> capture_test() -> [{capture_op, {1, 1, 2}, '&'}, {identifier, {1, 2, 4}, '||'}, - {mult_op, {1, 4, 5}, '/'}, - {decimal, {1, 5, 6}, 2}] = tokenize("&||/2"), + {mult_op, {1, 4, 5}, '/'}, + {decimal, {1, 5, 6}, "2"}] = tokenize("&||/2"), [{capture_op, {1, 1, 2}, '&'}, {identifier, {1, 2, 4}, 'or'}, - {mult_op, {1, 4, 5}, '/'}, - {decimal, {1, 5, 6}, 2}] = tokenize("&or/2"), - [{capture_op,{1,1,2},'&'}, - {unary_op,{1,2,5},'not'}, - {decimal,{1,6,7},1}, - {',',{1,7,8}}, - {decimal,{1,9,10},2}] = tokenize("¬ 1, 2"). + {mult_op, {1, 4, 5}, '/'}, + {decimal, {1, 5, 6}, "2"}] = tokenize("&or/2"), + [{capture_op, {1, 1, 2}, '&'}, + {unary_op, {1, 2, 5}, 'not'}, + {decimal, {1, 6, 7}, "1"}, + {',', {1, 7, 8}}, + {decimal, {1 ,9, 10}, "2"}] = tokenize("¬ 1, 2"). vc_merge_conflict_test() -> {1, "found an unexpected version control marker, please resolve the conflicts: ", "<<<<<<< HEAD"} = |