summaryrefslogtreecommitdiff
path: root/erts/emulator/test/bs_utf_SUITE.erl
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/test/bs_utf_SUITE.erl')
-rw-r--r--erts/emulator/test/bs_utf_SUITE.erl136
1 files changed, 124 insertions, 12 deletions
diff --git a/erts/emulator/test/bs_utf_SUITE.erl b/erts/emulator/test/bs_utf_SUITE.erl
index 68099c6f39..4a16ef44ab 100644
--- a/erts/emulator/test/bs_utf_SUITE.erl
+++ b/erts/emulator/test/bs_utf_SUITE.erl
@@ -20,11 +20,12 @@
-module(bs_utf_SUITE).
--export([all/0, suite/0,
+-export([all/0, suite/0, init_per_suite/1, end_per_suite/1,
utf8_roundtrip/1,utf16_roundtrip/1,utf32_roundtrip/1,
utf8_illegal_sequences/1,utf16_illegal_sequences/1,
utf32_illegal_sequences/1,
- bad_construction/1]).
+ bad_construction/1,
+ utf8_big_file/1]).
-include_lib("common_test/include/ct.hrl").
@@ -34,26 +35,93 @@ suite() ->
[{ct_hooks,[ts_install_cth]},
{timetrap, {minutes, 6}}].
-all() ->
+all() ->
[utf8_roundtrip, utf16_roundtrip, utf32_roundtrip,
utf8_illegal_sequences, utf16_illegal_sequences,
- utf32_illegal_sequences, bad_construction].
+ utf32_illegal_sequences, bad_construction,
+ utf8_big_file].
+
+init_per_suite(Config) ->
+ %% Make sure that calls to id/1 will hide types.
+ id(Config),
+ Config.
+
+end_per_suite(Config) ->
+ Config.
utf8_roundtrip(Config) when is_list(Config) ->
utf8_roundtrip(0, 16#D7FF),
utf8_roundtrip(16#E000, 16#10FFFF),
ok.
-utf8_roundtrip(First, Last) when First =< Last ->
- Bin = int_to_utf8(First),
+utf8_roundtrip(First, Last) ->
+ %% Hide types.
+ do_utf8_roundtrip(id(First), id(Last)).
+
+do_utf8_roundtrip(First, Last) when First =< Last ->
+ Bin = int_to_utf8(id(First)),
Bin = id(<<First/utf8>>),
Bin = id(<<(id(<<>>))/binary,First/utf8>>),
- Unaligned = id(<<3:2,First/utf8>>),
- <<_:2,Bin/binary>> = Unaligned,
+
+ <<0:7/unit:8,Bin/binary>> = id(<<0:7/unit:8,First/utf8>>),
+
+ %% Here a heap binary and a sub binary will be allocated. If the
+ %% write in the utf8 segment extends beyond the end of heap binary,
+ %% it will will overwrite the header for the sub binary.
+ <<-1:(64-9)/signed,Bin/binary>> = id(<<-1:(64-9),First/utf8>>),
+ <<-1:63/signed,Bin/binary>> = id(<<-1:63,First/utf8>>),
+
+ if
+ is_integer(First) ->
+ Bin = id(<<First/utf8>>)
+ end,
+
+ <<1:1,Bin/binary>> = id(<<1:1,First/utf8>>),
+ <<0:1,Bin/binary>> = id(<<0:1,First/utf8>>),
+ <<3:2,Bin/binary>> = id(<<3:2,First/utf8>>),
+ <<5:3,Bin/binary>> = id(<<5:3,First/utf8>>),
+ <<13:4,Bin/binary>> = id(<<13:4,First/utf8>>),
+ <<21:5,Bin/binary>> = id(<<21:5,First/utf8>>),
+ <<51:6,Bin/binary>> = id(<<51:6,First/utf8>>),
+ <<107:7,Bin/binary>> = id(<<107:7,First/utf8>>),
+
<<First/utf8>> = Bin,
<<First/utf8>> = make_unaligned(Bin),
- utf8_roundtrip(First+1, Last);
-utf8_roundtrip(_, _) -> ok.
+
+ %% Matching of utf8 segments use different code paths dependending
+ %% on the the number of bytes available in the binary. Make sure
+ %% we test both code paths.
+ <<First/utf8,0:64>> = id(<<Bin/binary,0:64>>),
+ <<0:3,First/utf8,0:64>> = id(<<0:3,Bin/binary,0:64>>),
+
+ unaligned_match(First),
+
+ Bin = id(<<First/utf8>>),
+ do_utf8_roundtrip(First+1, Last);
+do_utf8_roundtrip(_, _) -> ok.
+
+unaligned_match(Char) ->
+ %% We create a REFC binary so that we can create sub binaries
+ %% and control the contents just beyond the end of the binary.
+ _ = [begin
+ Bin = id(<<0:64/unit:8,0:Offset,Char/utf8>>),
+ <<0:64/unit:8,0:Offset,Char/utf8>> = Bin,
+ unaligned_match(Bin, Offset, 8)
+ end || Offset <- lists:seq(1, 7)],
+ ok.
+
+unaligned_match(_Bin, _Offset, 0) ->
+ ok;
+unaligned_match(Bin, Offset, N) ->
+ Size = bit_size(Bin),
+ <<Shorter:(Size-1)/bits,_:1>> = Bin,
+ try
+ <<0:64/unit:8,0:Offset,Char/utf8>> = Shorter,
+ ct:fail({short_binary_accepted,Shorter,Char})
+ catch
+ error:{badmatch,_} ->
+ unaligned_match(Shorter, Offset, N - 1)
+ end.
utf16_roundtrip(Config) when is_list(Config) ->
Big = fun utf16_big_roundtrip/1,
@@ -149,6 +217,7 @@ fail_range(Char, End) when Char =< End ->
{'EXIT',_} = (catch <<Char/utf8>>),
Bin = int_to_utf8(Char),
fail(Bin),
+ fail(<<Bin/binary,0:64>>),
fail_range(Char+1, End);
fail_range(_, _) -> ok.
@@ -201,24 +270,39 @@ overlong(Char, Last, NumBytes) when Char =< Last ->
overlong(_, _, _) -> ok.
overlong(Char, NumBytes) when NumBytes < 5 ->
- case int_to_utf8(Char, NumBytes) of
+ Bin = int_to_utf8(Char, NumBytes),
+ case <<(int_to_utf8(Char, NumBytes))/binary>> of
<<Char/utf8>>=Bin ->
ct:fail({illegal_encoding_accepted,Bin,Char});
<<OtherChar/utf8>>=Bin ->
ct:fail({illegal_encoding_accepted,Bin,Char,OtherChar});
_ -> ok
end,
+ case <<(int_to_utf8(Char, NumBytes))/binary,0:64>> of
+ <<Char/utf8,0:64>>=Bin2 ->
+ ct:fail({illegal_encoding_accepted,Bin2,Char});
+ <<OtherChar2/utf8,0:64>>=Bin2 ->
+ ct:fail({illegal_encoding_accepted,Bin2,Char,OtherChar2});
+ _ -> ok
+ end,
overlong(Char, NumBytes+1);
overlong(_, _) -> ok.
fail(Bin) ->
fail_1(Bin),
- fail_1(make_unaligned(Bin)).
+ fail_1(make_unaligned(Bin)),
+ BinExt = <<Bin/binary,0:64>>,
+ fail_2(BinExt),
+ fail_2(make_unaligned(BinExt)).
fail_1(<<Char/utf8>>=Bin) ->
ct:fail({illegal_encoding_accepted,Bin,Char});
fail_1(_) -> ok.
+fail_2(<<Char/utf8,0:64>>=Bin) ->
+ ct:fail({illegal_encoding_accepted,Bin,Char});
+fail_2(_) -> ok.
+
utf16_illegal_sequences(Config) when is_list(Config) ->
utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
@@ -295,6 +379,9 @@ bad_construction(Config) when is_list(Config) ->
?FAIL(<<3.14/utf8>>),
?FAIL(<<3.1415/utf16>>),
?FAIL(<<3.1415/utf32>>),
+ {'EXIT',_} = (catch <<(id(3.14))/utf8>>),
+ {'EXIT',_} = (catch <<(id(3.1415))/utf16>>),
+ {'EXIT',_} = (catch <<(id(3.1415))/utf32>>),
?FAIL(<<(-1)/utf8>>),
?FAIL(<<(-1)/utf16>>),
@@ -305,9 +392,23 @@ bad_construction(Config) when is_list(Config) ->
?FAIL(<<16#D800/utf8>>),
?FAIL(<<16#D800/utf16>>),
?FAIL(<<16#D800/utf32>>),
+ {'EXIT',_} = (catch <<(id(16#D800))/utf8>>),
+ {'EXIT',_} = (catch <<(id(16#D800))/utf16>>),
+ {'EXIT',_} = (catch <<(id(16#D800))/utf32>>),
ok.
+utf8_big_file(Config) ->
+ DataDir = get_data_dir(Config),
+ {ok, Bin} = file:read_file(filename:join(DataDir, "NormalizationTest.txt")),
+ List = unicode:characters_to_list(Bin),
+ _ = [begin
+ io:format("~p\n", [Offset]),
+ <<0:Offset, Rest/binary>> = id(<<0:Offset, Bin/binary>>),
+ List = [Char || <<Char/utf8>> <= Rest]
+ end || Offset <- lists:seq(0, 8)],
+ ok.
+
%% This function intentionally allows construction of
%% UTF-8 sequence in illegal ranges.
int_to_utf8(I) when I =< 16#7F ->
@@ -384,4 +485,15 @@ evaluate(Str, Vars) ->
Result
end.
+%% Retrieve the original data directory for cloned modules.
+get_data_dir(Config) ->
+ Data = proplists:get_value(data_dir, Config),
+ Opts = [{return,list}],
+ Suffixes = ["_no_opt_SUITE",
+ "_r25_SUITE"],
+ lists:foldl(fun(Suffix, Acc) ->
+ Opts = [{return,list}],
+ re:replace(Acc, Suffix, "_SUITE", Opts)
+ end, Data, Suffixes).
+
id(I) -> I.