diff options
author | Jan Uhlig <juhlig@hnc-agency.org> | 2022-09-15 12:28:33 +0200 |
---|---|---|
committer | Jan Uhlig <juhlig@hnc-agency.org> | 2022-09-15 12:29:29 +0200 |
commit | 05e61dc7eb568cc5a5db965dcc3534fb6c9aa66d (patch) | |
tree | f99de7b57a84f1da7886eae7fc73221436c4ef37 /lib/stdlib/test/property_test | |
parent | 114725ebe9e58b471fb5710b838499bd4df1099e (diff) | |
download | erlang-05e61dc7eb568cc5a5db965dcc3534fb6c9aa66d.tar.gz |
base64: Add selectable alphabet
RFC 4648 defines two possible alphabets that may be used for
encoding and decoding, the standard alphabet in Section 4 and
an alternative URL and Filename safe alphabet in Section 5.
This commit adds the ability to specify one of the alphabets
for encoding and decoding.
Co-authored-by: Maria Scott <maria-12648430@hnc-agency.org>
Diffstat (limited to 'lib/stdlib/test/property_test')
-rw-r--r-- | lib/stdlib/test/property_test/base64_prop.erl | 375 |
1 files changed, 251 insertions, 124 deletions
diff --git a/lib/stdlib/test/property_test/base64_prop.erl b/lib/stdlib/test/property_test/base64_prop.erl index 6ab7e4c68a..44b1811936 100644 --- a/lib/stdlib/test/property_test/base64_prop.erl +++ b/lib/stdlib/test/property_test/base64_prop.erl @@ -54,96 +54,200 @@ %%% Properties %%% %%%%%%%%%%%%%%%%%% -prop_encode() -> +prop_encode_1() -> ?FORALL( Str, oneof([list(byte()), binary()]), begin Enc = base64:encode(Str), Dec = base64:decode(Enc), - is_b64_binary(Enc) andalso str_equals(Str, Dec) + is_b64_binary(standard, Enc) andalso str_equals(Str, Dec) end ). -prop_encode_to_string() -> +prop_encode_2() -> + ?FORALL( + {Str, Mode}, + {oneof([list(byte()), binary()]), mode()}, + begin + Enc = base64:encode(Str, Mode), + Dec = base64:decode(Enc, Mode), + is_b64_binary(Mode, Enc) andalso str_equals(Str, Dec) + end + ). + +prop_encode_to_string_1() -> ?FORALL( Str, oneof([list(byte()), binary()]), begin Enc = base64:encode_to_string(Str), Dec = base64:decode_to_string(Enc), - is_b64_string(Enc) andalso str_equals(Str, Dec) + is_b64_string(standard, Enc) andalso str_equals(Str, Dec) + end + ). + +prop_encode_to_string_2() -> + ?FORALL( + {Str, Mode}, + {oneof([list(byte()), binary()]), mode()}, + begin + Enc = base64:encode_to_string(Str, Mode), + Dec = base64:decode_to_string(Enc, Mode), + is_b64_string(Mode, Enc) andalso str_equals(Str, Dec) end ). -prop_decode() -> +prop_decode_1() -> ?FORALL( {NormalizedB64, WspedB64}, - wsped_b64(), + wsped_b64(standard), begin Dec = base64:decode(WspedB64), Enc = base64:encode(Dec), - is_binary(Dec) andalso b64_equals(NormalizedB64, Enc) + is_binary(Dec) andalso b64_equals(standard, NormalizedB64, Enc) + end + ). + +prop_decode_2() -> + ?FORALL( + {{NormalizedB64, WspedB64}, Mode}, + ?LET( + Mode, + mode(), + {wsped_b64(Mode), Mode} + ), + begin + Dec = base64:decode(WspedB64, Mode), + Enc = base64:encode(Dec, Mode), + is_binary(Dec) andalso b64_equals(Mode, NormalizedB64, Enc) end ). -prop_decode_malformed() -> - common_decode_malformed(wsped_b64(), fun base64:decode/1). +prop_decode_1_malformed() -> + common_decode_malformed(fun wsped_b64/1, standard, fun(Data, _) -> base64:decode(Data) end). + +prop_decode_2_malformed() -> + common_decode_malformed(fun wsped_b64/1, mode(), fun base64:decode/2). -prop_decode_noisy() -> - common_decode_noisy(fun base64:decode/1). +prop_decode_1_noisy() -> + common_decode_noisy(standard, fun(Data, _) -> base64:decode(Data) end). -prop_decode_to_string() -> +prop_decode_2_noisy() -> + common_decode_noisy(mode(), fun base64:decode/2). + +prop_decode_to_string_1() -> ?FORALL( {NormalizedB64, WspedB64}, - wsped_b64(), + wsped_b64(standard), begin Dec = base64:decode_to_string(WspedB64), Enc = base64:encode(Dec), - is_bytelist(Dec) andalso b64_equals(NormalizedB64, Enc) + is_bytelist(Dec) andalso b64_equals(standard, NormalizedB64, Enc) + end + ). + +prop_decode_to_string_2() -> + ?FORALL( + {{NormalizedB64, WspedB64}, Mode}, + ?LET( + Mode, + mode(), + {wsped_b64(Mode), Mode} + ), + begin + Dec = base64:decode_to_string(WspedB64, Mode), + Enc = base64:encode(Dec, Mode), + is_bytelist(Dec) andalso b64_equals(Mode, NormalizedB64, Enc) end ). -prop_decode_to_string_malformed() -> - common_decode_malformed(wsped_b64(), fun base64:decode_to_string/1). +prop_decode_to_string_1_malformed() -> + common_decode_malformed(fun wsped_b64/1, standard, fun(Data, _) -> base64:decode_to_string(Data) end). + +prop_decode_to_string_2_malformed() -> + common_decode_malformed(fun wsped_b64/1, mode(), fun base64:decode_to_string/2). + +prop_decode_to_string_1_noisy() -> + common_decode_noisy(standard, fun(Data, _) -> base64:decode_to_string(Data) end). -prop_decode_to_string_noisy() -> - common_decode_noisy(fun base64:decode_to_string/1). +prop_decode_to_string_2_noisy() -> + common_decode_noisy(mode(), fun base64:decode_to_string/2). -prop_mime_decode() -> +prop_mime_decode_1() -> ?FORALL( {NormalizedB64, NoisyB64}, - noisy_b64(), + noisy_b64(standard), begin Dec = base64:mime_decode(NoisyB64), Enc = base64:encode(Dec), - is_binary(Dec) andalso b64_equals(NormalizedB64, Enc) + is_binary(Dec) andalso b64_equals(standard, NormalizedB64, Enc) end ). -prop_mime_decode_malformed() -> - common_decode_malformed(noisy_b64(), fun base64:mime_decode/1). +prop_mime_decode_2() -> + ?FORALL( + {{NormalizedB64, NoisyB64}, Mode}, + ?LET( + Mode, + mode(), + {wsped_b64(Mode), Mode} + ), + begin + Dec = base64:mime_decode(NoisyB64, Mode), + Enc = base64:encode(Dec, Mode), + is_binary(Dec) andalso b64_equals(Mode, NormalizedB64, Enc) + end + ). + +prop_mime_decode_1_malformed() -> + common_decode_malformed(fun noisy_b64/1, standard, fun(Data, _) -> base64:mime_decode(Data) end). -prop_mime_decode_to_string() -> +prop_mime_decode_2_malformed() -> + common_decode_malformed(fun noisy_b64/1, mode(), fun base64:mime_decode/2). + +prop_mime_decode_to_string_1() -> ?FORALL( {NormalizedB64, NoisyB64}, - noisy_b64(), + noisy_b64(standard), begin Dec = base64:mime_decode_to_string(NoisyB64), Enc = base64:encode(Dec), - is_bytelist(Dec) andalso b64_equals(NormalizedB64, Enc) + is_bytelist(Dec) andalso b64_equals(standard, NormalizedB64, Enc) end ). -prop_mime_decode_to_string_malformed() -> - common_decode_malformed(noisy_b64(), fun base64:mime_decode_to_string/1). +prop_mime_decode_to_string_2() -> + ?FORALL( + {{NormalizedB64, NoisyB64}, Mode}, + ?LET( + Mode, + mode(), + {wsped_b64(Mode), Mode} + ), + begin + Dec = base64:mime_decode_to_string(NoisyB64, Mode), + Enc = base64:encode(Dec, Mode), + is_bytelist(Dec) andalso b64_equals(Mode, NormalizedB64, Enc) + end + ). -common_decode_noisy(Fn) -> +prop_mime_decode_to_string_1_malformed() -> + common_decode_malformed(fun noisy_b64/1, standard, fun(Data, _) -> base64:mime_decode_to_string(Data) end). + +prop_mime_decode_to_string_2_malformed() -> + common_decode_malformed(fun noisy_b64/1, mode(), fun base64:mime_decode_to_string/2). + +common_decode_noisy(ModeGen, Fn) -> ?FORALL( - {_, NoisyB64}, - ?SUCHTHAT({NormalizedB64, NoisyB64}, noisy_b64(), NormalizedB64 =/= NoisyB64), + {{_, NoisyB64}, Mode}, + ?LET( + Mode, + ModeGen, + {?SUCHTHAT({NormalizedB64, NoisyB64}, noisy_b64(Mode), NormalizedB64 =/= NoisyB64), Mode} + ), try - Fn(NoisyB64) + Fn(NoisyB64, Mode) of _ -> false @@ -153,25 +257,30 @@ common_decode_noisy(Fn) -> end ). -common_decode_malformed(Gen, Fn) -> +common_decode_malformed(DataGen, ModeGen, Fn) -> ?FORALL( - MalformedB64, + {MalformedB64, Mode}, ?LET( - {{NormalizedB64, NoisyB64}, Malformings}, - { - Gen, - oneof( - [ - [b64_char()], - [b64_char(), b64_char()], - [b64_char(), b64_char(), b64_char()] - ] - ) - }, - {NormalizedB64, insert_noise(NoisyB64, Malformings)} + Mode, + ModeGen, + ?LET( + {{NormalizedB64, NoisyB64}, Malformings, InsertFn}, + { + DataGen(Mode), + oneof( + [ + [b64_char(Mode)], + [b64_char(Mode), b64_char(Mode)], + [b64_char(Mode), b64_char(Mode), b64_char(Mode)] + ] + ), + function1(boolean()) + }, + {{NormalizedB64, insert_noise(NoisyB64, Malformings, InsertFn)}, Mode} + ) ), try - Fn(MalformedB64) + Fn(MalformedB64, Mode) of _ -> false @@ -185,16 +294,20 @@ common_decode_malformed(Gen, Fn) -> %%% Generators %%% %%%%%%%%%%%%%%%%%% +%% Generate base64 encoding mode. +mode() -> + oneof([standard, urlsafe]). + %% Generate a single character from the base64 alphabet. -b64_char() -> - oneof(b64_chars()). +b64_char(Mode) -> + oneof(b64_chars(Mode)). %% Generate a string of characters from the base64 alphabet, %% including padding if needed. -b64_string() -> +b64_string(Mode) -> ?LET( {L, Filler}, - {list(b64_char()), b64_char()}, + {list(b64_char(Mode)), b64_char(Mode)}, case length(L) rem 4 of 0 -> L; 1 -> L ++ [Filler, $=, $=]; @@ -205,43 +318,43 @@ b64_string() -> %% Generate a binary of characters from the base64 alphabet, %% including padding if needed. -b64_binary() -> +b64_binary(Mode) -> ?LET( L, - b64_string(), + b64_string(Mode), list_to_binary(L) ). %% Generate a string or binary of characters from the %% base64 alphabet, including padding if needed. -b64() -> - oneof([b64_string(), b64_binary()]). +b64(Mode) -> + oneof([b64_string(Mode), b64_binary(Mode)]). %% Generate a string or binary of characters from the %% base64 alphabet, including padding if needed, with %% whitespaces inserted at random indexes. -wsped_b64() -> +wsped_b64(Mode) -> ?LET( - {B64, Wsps}, - {b64(), list(oneof([$\t, $\r, $\n, $\s]))}, - {B64, insert_noise(B64, Wsps)} + {B64, Wsps, InsertFn}, + {b64(Mode), list(oneof([$\t, $\r, $\n, $\s])), function1(boolean())}, + {B64, insert_noise(B64, Wsps, InsertFn)} ). %% Generate a single character outside of the base64 alphabet. %% As whitespaces are allowed but ignored in base64, this generator %% will produce no whitespaces, either. -non_b64_char() -> - oneof(lists:seq(16#00, 16#FF) -- b64_allowed_chars()). +non_b64_char(Mode) -> + oneof(lists:seq(16#00, 16#FF) -- b64_allowed_chars(Mode)). %% Generate a string or binary of characters from the %% base64 alphabet, including padding if needed, with %% whitespaces and non-base64 ("invalid") characters %% inserted at random indexes. -noisy_b64() -> +noisy_b64(Mode) -> ?LET( - {{B64, WspedB64}, Noise}, - {wsped_b64(), non_empty(list(non_b64_char()))}, - {B64, insert_noise(WspedB64, Noise)} + {{B64, WspedB64}, Noise, InsertFn}, + {wsped_b64(Mode), non_empty(list(non_b64_char(Mode))), function1(boolean())}, + {B64, insert_noise(WspedB64, Noise, InsertFn)} ). %%%%%%%%%%%%%%% @@ -252,81 +365,92 @@ noisy_b64() -> %% "=" is not included, as it is special in that it %% may only appear at the end of a base64 encoded string %% for padding. -b64_chars() -> +b64_chars_common() -> lists:seq($0, $9) ++ lists:seq($a, $z) ++ - lists:seq($A, $Z) ++ - [$+, $/]. + lists:seq($A, $Z). + +b64_chars(standard) -> + b64_chars_common() ++ [$+, $/]; +b64_chars(urlsafe) -> + b64_chars_common() ++ [$-, $_]. %% In addition to the above, the whitespace characters %% HTAB, CR, LF and SP are allowed to appear in a base64 %% encoded string and should be ignored. -b64_allowed_chars() -> - [$\t, $\r, $\n, $\s | b64_chars()]. +b64_allowed_chars(Mode) -> + [$\t, $\r, $\n, $\s | b64_chars(Mode)]. %% Insert the given list of noise characters at random %% places into the given base64 string. -insert_noise(B64, []) -> +insert_noise(B64, Noise, InsertFn) -> + insert_noise(B64, Noise, InsertFn, 0). + +insert_noise(B64, [], _, _) -> B64; -insert_noise([], Noise) -> +insert_noise([], Noise, _, _) -> Noise; -insert_noise(<<>>, Noise) -> +insert_noise(<<>>, Noise, _, _) -> list_to_binary(Noise); -insert_noise([B|Bs] = B64, [N|Ns] = Noise) -> - case rand:uniform(2) of - 1 -> - [B|insert_noise(Bs, Noise)]; - 2 -> - [N|insert_noise(B64, Ns)] +insert_noise([B|Bs] = B64, [N|Ns] = Noise, InsertFn, Idx) -> + case InsertFn(Idx) of + true -> + [B|insert_noise(Bs, Noise, InsertFn, Idx + 1)]; + false -> + [N|insert_noise(B64, Ns, InsertFn, Idx + 1)] end; -insert_noise(<<B, Bs/binary>> = B64, [N|Ns] = Noise) -> - case rand:uniform(2) of - 1 -> - <<B, (insert_noise(Bs, Noise))/binary>>; - 2 -> - <<N, (insert_noise(B64, Ns))/binary>> +insert_noise(<<B, Bs/binary>> = B64, [N|Ns] = Noise, InsertFn, Idx) -> + case InsertFn(Idx) of + true -> + <<B, (insert_noise(Bs, Noise, InsertFn, Idx + 1))/binary>>; + false -> + <<N, (insert_noise(B64, Ns, InsertFn, Idx + 1))/binary>> end. %% Check if the given character is in the base64 alphabet. %% This does not include the padding character "=". -is_b64_char($+) -> +is_b64_char(standard, $+) -> + true; +is_b64_char(standard, $/) -> + true; +is_b64_char(urlsafe, $-) -> true; -is_b64_char($/) -> +is_b64_char(urlsafe, $_) -> true; -is_b64_char(C) when C >= $0, C =< $9 -> +is_b64_char(_, C) when C >= $0, C =< $9 -> true; -is_b64_char(C) when C >= $A, C =< $Z -> +is_b64_char(_, C) when C >= $A, C =< $Z -> true; -is_b64_char(C) when C >= $a, C =< $z -> +is_b64_char(_, C) when C >= $a, C =< $z -> true; -is_b64_char(_) -> +is_b64_char(_, _) -> false. %% Check if the given argument is a base64 binary, %% ie that it consists of quadruplets of characters %% from the base64 alphabet, whereas the last quadruplet %% may be padded with one or two "="s -is_b64_binary(B) -> - is_b64_binary(B, 0). +is_b64_binary(Mode, B) -> + is_b64_binary(Mode, B, 0). -is_b64_binary(<<>>, N) -> +is_b64_binary(_, <<>>, N) -> N rem 4 =:= 0; -is_b64_binary(<<$=>>, N) -> +is_b64_binary(_, <<$=>>, N) -> N rem 4 =:= 3; -is_b64_binary(<<$=, $=>>, N) -> +is_b64_binary(_, <<$=, $=>>, N) -> N rem 4 =:= 2; -is_b64_binary(<<C, More/binary>>, N) -> - case is_b64_char(C) of +is_b64_binary(Mode, <<C, More/binary>>, N) -> + case is_b64_char(Mode, C) of true -> - is_b64_binary(More, N + 1); + is_b64_binary(Mode, More, N + 1); false -> false end. %% Check if the given argument is a base64 string %% (see is_b64_binary/1) -is_b64_string(S) -> - is_b64_binary(list_to_binary(S)). +is_b64_string(Mode, S) -> + is_b64_binary(Mode, list_to_binary(S)). %% Check if the argument is a list of bytes. is_bytelist(L) -> @@ -349,23 +473,23 @@ str_equals(Str1, Str2) when is_binary(Str1), is_binary(Str2) -> %% Assumes that the given arguments are in a normalized form, %% ie that they consist only of characters from the base64 %% alphabet and possible padding ("="). -b64_equals(L, B) when is_list(L) -> - b64_equals(list_to_binary(L), B); -b64_equals(B, L) when is_list(L) -> - b64_equals(B, list_to_binary(L)); -b64_equals(B1, B2) when is_binary(B1), is_binary(B2) -> - b64_equals1(B1, B2). - -b64_equals1(<<Eq:4/bytes>>, <<Eq:4/bytes>>) -> - is_b64_binary(Eq); -b64_equals1(<<Eq:4/bytes, More1/binary>>, <<Eq:4/bytes, More2/binary>>) -> - case lists:all(fun is_b64_char/1, binary_to_list(Eq)) of +b64_equals(Mode, L, B) when is_list(L) -> + b64_equals(Mode, list_to_binary(L), B); +b64_equals(Mode, B, L) when is_list(L) -> + b64_equals(Mode, B, list_to_binary(L)); +b64_equals(Mode, B1, B2) when is_binary(B1), is_binary(B2) -> + b64_equals1(Mode, B1, B2). + +b64_equals1(Mode, <<Eq:4/bytes>>, <<Eq:4/bytes>>) -> + is_b64_binary(Mode, Eq); +b64_equals1(Mode, <<Eq:4/bytes, More1/binary>>, <<Eq:4/bytes, More2/binary>>) -> + case lists:all(fun(C) -> is_b64_char(Mode, C) end, binary_to_list(Eq)) of true -> - b64_equals1(More1, More2); + b64_equals1(Mode, More1, More2); false -> false end; -b64_equals1(<<Eq, B1, $=, $=>>, <<Eq, B2, $=, $=>>) -> +b64_equals1(Mode, <<Eq, B1, $=, $=>>, <<Eq, B2, $=, $=>>) -> %% If the encoded string ends with "==", there exist multiple %% possibilities for the character preceding the "==" as only the %% 3rd and 4th bits of the encoded byte represented by that @@ -374,7 +498,7 @@ b64_equals1(<<Eq, B1, $=, $=>>, <<Eq, B2, $=, $=>>) -> %% For example, all of the encoded strings "QQ==", "QR==", ..., "QZ==" %% decode to the string "A", since all the bytes represented by Q to Z %% are the same in the significant 3rd and 4th bit. - case is_b64_char(Eq) of + case is_b64_char(Mode, Eq) of true -> Normalize = fun (C) when C >= $A, C =< $P -> $A; @@ -383,20 +507,21 @@ b64_equals1(<<Eq, B1, $=, $=>>, <<Eq, B2, $=, $=>>) -> (C) when C >= $g, C =< $v -> $g; (C) when C >= $w, C =< $z -> $w; (C) when C >= $0, C =< $9 -> $w; - ($+) -> $w; - ($/) -> $w + ($+) when Mode =:= standard -> $w; + ($-) when Mode =:= urlsafe -> $w; + ($/) when Mode =:= standard -> $w; + ($_) when Mode =:= urlsafe -> $w end, Normalize(B1) =:= Normalize(B2); false -> false end; -b64_equals1(<<Eq:2/bytes, B1, $=>>, <<Eq:2/bytes, B2, $=>>) -> +b64_equals1(Mode, <<Eq1, Eq2, B1, $=>>, <<Eq1, Eq2, B2, $=>>) -> %% Similar to the above, but with the encoded string ending with a %% single "=" the 3rd to 6th bits of the encoded byte are significant, %% such that, for example, all the encoded strings "QUE=" to "QUH=" %% decode to the same string "AA". - <<Eq1, Eq2>> = Eq, - case is_b64_char(Eq1) andalso is_b64_char(Eq2) of + case is_b64_char(Mode, Eq1) andalso is_b64_char(Mode, Eq2) of true -> Normalize = fun (C) when C >= $A, C =< $D -> $A; @@ -416,14 +541,16 @@ b64_equals1(<<Eq:2/bytes, B1, $=>>, <<Eq:2/bytes, B2, $=>>) -> (C) when C >= $0, C =< $3 -> $0; (C) when C >= $4, C =< $7 -> $4; (C) when C >= $8, C =< $9 -> $8; - ($+) -> $8; - ($/) -> $8 + ($+) when Mode =:= standard -> $8; + ($-) when Mode =:= urlsafe -> $8; + ($/) when Mode =:= standard -> $8; + ($_) when Mode =:= urlsafe -> $8 end, Normalize(B1) =:= Normalize(B2); false -> false end; -b64_equals1(<<>>, <<>>) -> +b64_equals1(_, <<>>, <<>>) -> true; -b64_equals1(_, _) -> +b64_equals1(_, _, _) -> false. |