diff options
Diffstat (limited to 'lib/compiler/src/beam_ssa_opt.erl')
-rw-r--r-- | lib/compiler/src/beam_ssa_opt.erl | 482 |
1 files changed, 417 insertions, 65 deletions
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index e10189afd8..13e4114319 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2018-2022. All Rights Reserved. +%% Copyright Ericsson AB 2018-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -64,7 +64,9 @@ module(Module, Opts) -> Phases = [{once, Order, prologue_passes(Opts)}, {module, module_passes(Opts)}, {fixpoint, Order, repeated_passes(Opts)}, - {once, Order, epilogue_passes(Opts)}], + {once, Order, early_epilogue_passes(Opts)}, + {module, epilogue_module_passes(Opts)}, + {once, Order, late_epilogue_passes(Opts)}], StMap = run_phases(Phases, StMap0, FuncDb), {ok, finish(Module, StMap)}. @@ -93,7 +95,7 @@ skip_removed(FuncIds, StMap) -> fixpoint(_FuncIds, _Order, _Passes, StMap, FuncDb, 0) -> %% Too many repetitions. Give up and return what we have. {StMap, FuncDb}; -fixpoint(FuncIds0, Order0, Passes, StMap0, FuncDb0, N) -> +fixpoint(FuncIds0, Order0, Passes, StMap0, FuncDb0, N) when is_map(StMap0) -> {StMap, FuncDb} = phase(FuncIds0, Passes, StMap0, FuncDb0), Repeat = changed(FuncIds0, FuncDb0, FuncDb, StMap0, StMap), case sets:is_empty(Repeat) of @@ -243,6 +245,7 @@ prologue_passes(Opts) -> ?PASS(ssa_opt_linearize), ?PASS(ssa_opt_tuple_size), ?PASS(ssa_opt_record), + ?PASS(ssa_opt_update_tuple), ?PASS(ssa_opt_cse), % Helps the first type pass. ?PASS(ssa_opt_live)], % ... passes_1(Ps, Opts). @@ -275,12 +278,25 @@ repeated_passes(Opts) -> %clean up phi nodes. passes_1(Ps, Opts). -epilogue_passes(Opts) -> +epilogue_module_passes(Opts) -> + Ps0 = [{ssa_opt_alias, + fun({StMap, FuncDb}) -> + beam_ssa_alias:opt(StMap, FuncDb) + end}, + {ssa_opt_private_append, + fun({StMap, FuncDb}) -> + beam_ssa_private_append:opt(StMap, FuncDb) + end}], + passes_1(Ps0, Opts). + +early_epilogue_passes(Opts) -> Ps = [?PASS(ssa_opt_type_finish), ?PASS(ssa_opt_float), - ?PASS(ssa_opt_sw), + ?PASS(ssa_opt_sw)], + passes_1(Ps, Opts). - %% Run live one more time to clean up after the previous +late_epilogue_passes(Opts) -> + Ps = [%% Run live one more time to clean up after the previous %% epilogue passes. ?PASS(ssa_opt_live), ?PASS(ssa_opt_bsm), @@ -288,7 +304,9 @@ epilogue_passes(Opts) -> ?PASS(ssa_opt_sink), ?PASS(ssa_opt_blockify), ?PASS(ssa_opt_redundant_br), + ?PASS(ssa_opt_bs_ensure), ?PASS(ssa_opt_merge_blocks), + ?PASS(ssa_opt_try), ?PASS(ssa_opt_get_tuple_element), ?PASS(ssa_opt_tail_literals), ?PASS(ssa_opt_trim_unreachable), @@ -299,14 +317,16 @@ epilogue_passes(Opts) -> passes_1(Ps, Opts0) -> Negations = [{list_to_atom("no_"++atom_to_list(N)),N} || {N,_} <- Ps], - Opts = proplists:substitute_negations(Negations, Opts0), + Expansions = [{no_bs_match,[no_ssa_opt_bs_ensure,no_bs_match]}], + Opts = proplists:normalize(Opts0, [{expand,Expansions}, + {negations,Negations}]), [case proplists:get_value(Name, Opts, true) of true -> P; false -> {NoName,Name} = keyfind(Name, 2, Negations), {NoName,fun(S) -> S end} - end || {Name,_}=P <- Ps]. + end || {Name,_}=P <- Ps]. %% Builds a function information map with basic information about incoming and %% outgoing local calls, as well as whether the function is exported. @@ -393,7 +413,7 @@ fdb_update(Caller, Callee, FuncDb) -> %% Functions where module-level optimization is disabled are added last in %% arbitrary order. -get_call_order_po(StMap, FuncDb) -> +get_call_order_po(StMap, FuncDb) when is_map(FuncDb) -> Order = gco_po(FuncDb), Order ++ sort([K || K <- maps:keys(StMap), not is_map_key(K, FuncDb)]). @@ -491,7 +511,7 @@ ssa_opt_split_blocks({#opt_st{ssa=Blocks0,cnt=Count0}=St, FuncDb}) -> %%% different registers). %%% -ssa_opt_coalesce_phis({#opt_st{ssa=Blocks0}=St, FuncDb}) -> +ssa_opt_coalesce_phis({#opt_st{ssa=Blocks0}=St, FuncDb}) when is_map(Blocks0) -> Ls = beam_ssa:rpo(Blocks0), Blocks = c_phis_1(Ls, Blocks0), {St#opt_st{ssa=Blocks}, FuncDb}. @@ -876,6 +896,73 @@ is_tagged_tuple_4([_|Is], Bool, TagVar) -> is_tagged_tuple_4([], _, _) -> no. %%% +%%% Replaces setelement/3 with the update_tuple psuedo-instruction, and merges +%%% multiple such calls into the same instruction. +%%% +ssa_opt_update_tuple({#opt_st{ssa=Linear0}=St, FuncDb}) -> + {St#opt_st{ssa=update_tuple_opt(Linear0, #{})}, FuncDb}. + +update_tuple_opt([{L, #b_blk{is=Is0}=B} | Bs], SetOps0) -> + {Is, SetOps} = update_tuple_opt_is(Is0, SetOps0, []), + [{L, B#b_blk{is=Is}} | update_tuple_opt(Bs, SetOps)]; +update_tuple_opt([], _SetOps) -> + []. + +update_tuple_opt_is([#b_set{op=call, + dst=Dst, + args=[#b_remote{mod=#b_literal{val=erlang}, + name=#b_literal{val=setelement}}, + #b_literal{val=N}=Index, + Src, + Value]}=I0 | Is], + SetOps0, Acc) when is_integer(N), N >= 1 -> + SetOps1 = SetOps0#{ Dst => {Src, Index, Value} }, + SetOps = maps:remove(Value, SetOps1), + + Args = update_tuple_merge(Src, SetOps, [Index, Value], + sets:new([{version,2}])), + I = I0#b_set{op=update_tuple,dst=Dst,args=Args}, + + update_tuple_opt_is(Is, SetOps, [I | Acc]); +update_tuple_opt_is([#b_set{op=Op}=I | Is], SetOps0, Acc) -> + case {Op, beam_ssa:clobbers_xregs(I)} of + {_, true} -> + %% Merging setelement across stack frames is potentially very + %% expensive as the update list needs to be saved on the stack, so + %% we discard our state whenever we need one. + update_tuple_opt_is(Is, #{}, [I | Acc]); + {{succeeded, _}, false} -> + %% This is a psuedo-op used to link ourselves with our catch block, + %% so it doesn't really count as a use. + update_tuple_opt_is(Is, SetOps0, [I | Acc]); + {_, false} -> + %% It's pointless to merge us with later ops if our result is used + %% and needs to be created anyway. + SetOps = maps:without(beam_ssa:used(I), SetOps0), + update_tuple_opt_is(Is, SetOps, [I | Acc]) + end; +update_tuple_opt_is([], SetOps, Acc) -> + {reverse(Acc), SetOps}. + +update_tuple_merge(Src, SetOps, Updates0, Seen0) -> + %% Note that we're merging in reverse order, so Updates0 contains the + %% updates made *after* this one. + case SetOps of + #{ Src := {Ancestor, Index, Value} } -> + %% Drop redundant updates, which can happen when when a record is + %% updated in several branches and one of them overwrites a + %% previous index. + Updates = case sets:is_element(Index, Seen0) of + false -> [Index, Value | Updates0]; + true -> Updates0 + end, + Seen = sets:add_element(Index, Seen0), + update_tuple_merge(Ancestor, SetOps, Updates, Seen); + #{} -> + [Src | Updates0] + end. + +%%% %%% Common subexpression elimination (CSE). %%% %%% Eliminate repeated evaluation of identical expressions. To avoid @@ -903,7 +990,7 @@ cse_successors([#b_set{op={succeeded,_},args=[Src]},Bif|_], Blk, EsSucc, M0) -> %% We must remove the substitution for Src from the failure branch. #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk, M = cse_successors_1([Succ], EsSucc, M0), - EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc), + EsFail = #{Var => Val || Var := Val <- EsSucc, Val =/= Src}, cse_successors_1([Fail], EsFail, M); false -> %% There can't be any replacement for Src in EsSucc. No need for @@ -957,6 +1044,30 @@ cse_is([#b_set{op={succeeded,_},dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) -> Sub = Sub0#{Bool=>#b_literal{val=true}}, cse_is(Is, Es, Sub, Acc) end; +cse_is([#b_set{op=put_map,dst=Dst,args=[_Kind,Map|_]}=I0|Is], + Es0, Sub0, Acc) -> + I1 = sub(I0, Sub0), + {ok,ExprKey} = cse_expr(I1), + case Es0 of + #{ExprKey:=PrevPutMap} -> + Sub = Sub0#{Dst=>PrevPutMap}, + cse_is(Is, Es0, Sub, Acc); + #{Map:=PutMap} -> + case combine_put_maps(PutMap, I1) of + none -> + Es1 = Es0#{ExprKey=>Dst}, + Es = cse_add_inferred_exprs(I1, Es1), + cse_is(Is, Es, Sub0, [I1|Acc]); + I -> + Es1 = Es0#{ExprKey=>Dst}, + Es = cse_add_inferred_exprs(I1, Es1), + cse_is(Is, Es, Sub0, [I|Acc]) + end; + #{} -> + Es1 = Es0#{ExprKey=>Dst}, + Es = cse_add_inferred_exprs(I1, Es1), + cse_is(Is, Es, Sub0, [I1|Acc]) + end; cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) -> I = sub(I0, Sub0), case beam_ssa:clobbers_xregs(I) of @@ -1005,8 +1116,9 @@ cse_add_inferred_exprs(#b_set{op={bif,tl},dst=Tl,args=[List]}, Es) -> Es#{{get_tl,[List]} => Tl}; cse_add_inferred_exprs(#b_set{op={bif,map_get},dst=Value,args=[Key,Map]}, Es) -> Es#{{get_map_element,[Map,Key]} => Value}; -cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=[_,_|Args]}, Es) -> - cse_add_map_get(Args, Map, Es); +cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=[_,_|Args]}=I, Es0) -> + Es = cse_add_map_get(Args, Map, Es0), + Es#{Map => I}; cse_add_inferred_exprs(_, Es) -> Es. cse_add_map_get([Key,Value|T], Map, Es0) -> @@ -1045,6 +1157,42 @@ cse_suitable(#b_set{anno=Anno,op={bif,Name},args=Args}) -> erl_internal:bool_op(Name, Arity)); cse_suitable(#b_set{}) -> false. +combine_put_maps(#b_set{dst=Prev,args=[#b_literal{val=assoc},Map|Args1]}, + #b_set{args=[#b_literal{val=assoc},Prev|Args2]}=I) -> + case are_map_keys_literals(Args1) andalso are_map_keys_literals(Args2) of + true -> + Args = combine_put_map_args(Args1, Args2), + I#b_set{args=[#b_literal{val=assoc},Map|Args]}; + false -> + none + end; +combine_put_maps(#b_set{}, #b_set{}) -> + none. + +combine_put_map_args(Args1, Args2) -> + Keys = sets:from_list(get_map_keys(Args2), [{version,2}]), + combine_put_map_args_1(Args1, Args2, Keys). + +combine_put_map_args_1([Key,Value|T], Tail, Keys) -> + case sets:is_element(Key, Keys) of + true -> + combine_put_map_args_1(T, Tail, Keys); + false -> + [Key,Value|combine_put_map_args_1(T, Tail, Keys)] + end; +combine_put_map_args_1([], Tail, _Keys) -> Tail. + +get_map_keys([Key,_|T]) -> + [Key|get_map_keys(T)]; +get_map_keys([]) -> []. + +are_map_keys_literals([#b_literal{},_Value|Args]) -> + are_map_keys_literals(Args); +are_map_keys_literals([#b_var{}|_]) -> + false; +are_map_keys_literals([]) -> + true. + %%% %%% Using floating point instructions. %%% @@ -1351,9 +1499,9 @@ live_opt_phis(Is, L, Live0, LiveMap0) -> case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of [_|_]=PhiVars -> PhiLive0 = rel2fam(PhiVars), - PhiLive = [{{L,P},list_set_union(Vs, Live0)} || - {P,Vs} <- PhiLive0], - maps:merge(LiveMap, maps:from_list(PhiLive)); + PhiLive = #{{L,P} => list_set_union(Vs, Live0) || + {P,Vs} <- PhiLive0}, + maps:merge(LiveMap, PhiLive); [] -> %% There were only literals in the phi node(s). LiveMap @@ -1434,7 +1582,14 @@ live_opt_is([], Live, Acc) -> %%% never throw. %%% -ssa_opt_try({#opt_st{ssa=Linear,cnt=Count0}=St, FuncDb}) -> +ssa_opt_try({#opt_st{ssa=SSA0,cnt=Count0}=St, FuncDb}) -> + {Count, SSA} = opt_try(SSA0, Count0), + {St#opt_st{ssa=SSA,cnt=Count}, FuncDb}. + +opt_try(Blocks, Count0) when is_map(Blocks) -> + {Count, Linear} = opt_try(beam_ssa:linearize(Blocks), Count0), + {Count, maps:from_list(Linear)}; +opt_try(Linear, Count0) when is_list(Linear) -> {Count, Shrunk} = shrink_try(Linear, Count0, []), Reduced = reduce_try(Shrunk, []), @@ -1442,7 +1597,7 @@ ssa_opt_try({#opt_st{ssa=Linear,cnt=Count0}=St, FuncDb}) -> EmptySet = sets:new([{version, 2}]), Trimmed = trim_try(Reduced, EmptySet, EmptySet, []), - {St#opt_st{ssa=Trimmed,cnt=Count}, FuncDb}. + {Count, Trimmed}. %% Moves all leading/trailing instructions that cannot fail out of try/catch %% expressions. For example, we can move the tuple constructions `{defg,Arg}` @@ -1514,13 +1669,19 @@ sink_try_is(Is) -> sink_try_is_1([#b_set{op=kill_try_tag}=Kill | Is], Acc) -> [Kill | reverse(Acc, Is)]; sink_try_is_1([I | Is], Acc) -> - case beam_ssa:no_side_effect(I) of + case is_safe_sink_try(I) of true -> sink_try_is_1(Is, [I | Acc]); false -> reverse(Acc, [I | Is]) end; sink_try_is_1([], Acc) -> reverse(Acc). +is_safe_sink_try(#b_set{op=Op}=I) -> + case Op of + bs_extract -> false; + _ -> beam_ssa:no_side_effect(I) + end. + %% Does a strength reduction of try/catch and catch. %% %% In try/catch constructs where the expression is restricted @@ -1702,20 +1863,23 @@ trim_try_is([], _Killed) -> %%% with bs_test_tail. %%% -ssa_opt_bsm({#opt_st{ssa=Linear}=St, FuncDb}) -> - Extracted0 = bsm_extracted(Linear), +ssa_opt_bsm({#opt_st{ssa=Linear0}=St, FuncDb}) -> + Extracted0 = bsm_extracted(Linear0), Extracted = sets:from_list(Extracted0, [{version, 2}]), - {St#opt_st{ssa=bsm_skip(Linear, Extracted)}, FuncDb}. + Linear1 = bsm_skip(Linear0, Extracted), + Linear = bsm_coalesce_skips(Linear1, #{}), + {St#opt_st{ssa=Linear}, FuncDb}. bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs0], Extracted) -> Bs = bsm_skip(Bs0, Extracted), Is = bsm_skip_is(Is0, Extracted), - coalesce_skips({L,Blk#b_blk{is=Is}}, Bs); + [{L,Blk#b_blk{is=Is}}|Bs]; bsm_skip([], _) -> []. bsm_skip_is([I0|Is], Extracted) -> case I0 of - #b_set{op=bs_match, + #b_set{anno=Anno0, + op=bs_match, dst=Ctx, args=[#b_literal{val=T}=Type,PrevCtx|Args0]} when T =/= float, T =/= string, T =/= skip -> @@ -1727,7 +1891,8 @@ bsm_skip_is([I0|Is], Extracted) -> false -> %% The value is never extracted. Args = [#b_literal{val=skip},PrevCtx,Type|Args0], - I0#b_set{args=Args} + Anno = maps:remove(arg_types, Anno0), + I0#b_set{anno=Anno,args=Args} end, [I|Is]; #b_set{} -> @@ -1744,22 +1909,31 @@ bsm_extracted([{_,#b_blk{is=Is}}|Bs]) -> end; bsm_extracted([]) -> []. +bsm_coalesce_skips([{L,Blk0}|Bs0], Renames0) -> + case coalesce_skips({L,Blk0}, Bs0, Renames0) of + not_possible -> + [{L,Blk0}|bsm_coalesce_skips(Bs0, Renames0)]; + {Bs,Renames} -> + bsm_coalesce_skips(Bs, Renames) + end; +bsm_coalesce_skips([], _Renames) -> []. + coalesce_skips({L,#b_blk{is=[#b_set{op=bs_extract}=Extract|Is0], - last=Last0}=Blk0}, Bs0) -> - case coalesce_skips_is(Is0, Last0, Bs0) of + last=Last0}=Blk0}, Bs0, Renames0) -> + case coalesce_skips_is(Is0, Last0, Bs0, Renames0) of not_possible -> - [{L,Blk0}|Bs0]; - {Is,Last,Bs} -> + not_possible; + {Is,Last,Bs,Renames} -> Blk = Blk0#b_blk{is=[Extract|Is],last=Last}, - [{L,Blk}|Bs] + {[{L,Blk}|Bs],Renames} end; -coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0) -> - case coalesce_skips_is(Is0, Last0, Bs0) of +coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0, Renames0) -> + case coalesce_skips_is(Is0, Last0, Bs0, Renames0) of not_possible -> - [{L,Blk0}|Bs0]; - {Is,Last,Bs} -> + not_possible; + {Is,Last,Bs,Renames} -> Blk = Blk0#b_blk{is=Is,last=Last}, - [{L,Blk}|Bs] + {[{L,Blk}|Bs],Renames} end. coalesce_skips_is([#b_set{op=bs_match, @@ -1767,53 +1941,59 @@ coalesce_skips_is([#b_set{op=bs_match, Ctx0,Type,Flags, #b_literal{val=Size0}, #b_literal{val=Unit0}], - dst=Ctx}=Skip0, + dst=PrevCtx}=Skip0, #b_set{op={succeeded,guard}}], #b_br{succ=L2,fail=Fail}=Br0, - Bs0) when is_integer(Size0) -> + Bs0, + Renames0) when is_integer(Size0) -> case Bs0 of [{L2,#b_blk{is=[#b_set{op=bs_match, dst=SkipDst, - args=[#b_literal{val=skip},Ctx,_,_, + args=[#b_literal{val=skip},PrevCtx,_,_, #b_literal{val=Size1}, #b_literal{val=Unit1}]}, #b_set{op={succeeded,guard}}=Succeeded], last=#b_br{fail=Fail}=Br}}|Bs] when is_integer(Size1) -> + OldCtx = maps:get(Ctx0, Renames0, Ctx0), SkipBits = Size0 * Unit0 + Size1 * Unit1, Skip = Skip0#b_set{dst=SkipDst, - args=[#b_literal{val=skip},Ctx0, + args=[#b_literal{val=skip},OldCtx, Type,Flags, #b_literal{val=SkipBits}, #b_literal{val=1}]}, Is = [Skip,Succeeded], - {Is,Br,Bs}; + Renames = Renames0#{PrevCtx => Ctx0}, + {Is,Br,Bs,Renames}; [{L2,#b_blk{is=[#b_set{op=bs_test_tail, - args=[Ctx,#b_literal{val=TailSkip}]}], + args=[PrevCtx,#b_literal{val=TailSkip}]}], last=#b_br{succ=NextSucc,fail=Fail}}}|Bs] -> + OldCtx = maps:get(Ctx0, Renames0, Ctx0), SkipBits = Size0 * Unit0, TestTail = Skip0#b_set{op=bs_test_tail, - args=[Ctx0,#b_literal{val=SkipBits+TailSkip}]}, + args=[OldCtx,#b_literal{val=SkipBits+TailSkip}]}, Br = Br0#b_br{bool=TestTail#b_set.dst,succ=NextSucc}, Is = [TestTail], - {Is,Br,Bs}; + Renames = Renames0#{PrevCtx => Ctx0}, + {Is,Br,Bs,Renames}; _ -> not_possible end; -coalesce_skips_is(_, _, _) -> +coalesce_skips_is(_, _, _, _) -> not_possible. %%% %%% Short-cutting binary matching instructions. %%% -ssa_opt_bsm_shortcut({#opt_st{ssa=Linear}=St, FuncDb}) -> - Positions = bsm_positions(Linear, #{}), +ssa_opt_bsm_shortcut({#opt_st{ssa=Linear0}=St, FuncDb}) -> + Positions = bsm_positions(Linear0, #{}), case map_size(Positions) of 0 -> %% No binary matching instructions. {St, FuncDb}; _ -> - {St#opt_st{ssa=bsm_shortcut(Linear, Positions)}, FuncDb} + Linear = bsm_shortcut(Linear0, Positions), + ssa_opt_live({St#opt_st{ssa=Linear}, FuncDb}) end. bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) -> @@ -1854,20 +2034,36 @@ bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits) Bits + Sz*U; bsm_update_bits(_, Bits) -> Bits. -bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) -> +bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap0) -> case {Is,Last0} of {[#b_set{op=bs_match,dst=New,args=[_,Old|_]}, #b_set{op={succeeded,guard},dst=Bool,args=[New]}], #b_br{bool=Bool,fail=Fail}} -> - case PosMap of - #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits -> + case PosMap0 of + #{Old := Bits,Fail := {TailBits,NextFail}} when Bits > TailBits -> Last = Last0#b_br{fail=NextFail}, - [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)]; + [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap0)]; + #{} -> + [{L,Blk}|bsm_shortcut(Bs, PosMap0)] + end; + {[#b_set{op=bs_test_tail,dst=Bool,args=[Old,#b_literal{val=TailBits}]}], + #b_br{bool=Bool,succ=Succ,fail=Fail}} -> + case PosMap0 of + #{{bs_test_tail,Old,L} := ActualTailBits} -> + Last1 = if + TailBits =:= ActualTailBits -> + Last0#b_br{fail=Succ}; + true -> + Last0#b_br{succ=Fail} + end, + Last = beam_ssa:normalize(Last1), + [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap0)]; #{} -> + PosMap = PosMap0#{{bs_test_tail,Old,Succ} => TailBits}, [{L,Blk}|bsm_shortcut(Bs, PosMap)] end; {_,_} -> - [{L,Blk}|bsm_shortcut(Bs, PosMap)] + [{L,Blk}|bsm_shortcut(Bs, PosMap0)] end; bsm_shortcut([], _PosMap) -> []. @@ -1933,18 +2129,20 @@ opt_create_bin_arg(Type, Unit, Flags, #b_literal{val=Val}, #b_literal{val=Size}) when is_integer(Size), is_integer(Unit) -> EffectiveSize = Size * Unit, if - EffectiveSize > 0 -> + EffectiveSize > (1 bsl 24) -> + %% Don't bother converting really huge segments as they might fail + %% with a `system_limit` exception in runtime. Keeping them as-is + %% ensures that the extended error information will be accurate. + %% + %% We'll also reduce the risk of crashing with an unhelpful "out of + %% memory" error message during compilation. + not_possible; + EffectiveSize > 0, EffectiveSize =< (1 bsl 24) -> case {Type,opt_create_bin_endian(Flags)} of {integer,big} when is_integer(Val) -> if EffectiveSize < 64 -> [<<Val:EffectiveSize>>]; - EffectiveSize > 1 bsl 24 -> - %% The binary construction could fail with a - %% system_limit. Don't optimize to ensure that - %% the extended error information will be - %% accurate. - not_possible; true -> opt_bs_put_split_int(Val, EffectiveSize) end; @@ -2322,7 +2520,7 @@ ssa_opt_sink({#opt_st{ssa=Linear}=St, FuncDb}) -> {do_ssa_opt_sink(Defs, St), FuncDb} end. -do_ssa_opt_sink(Defs, #opt_st{ssa=Linear}=St) -> +do_ssa_opt_sink(Defs, #opt_st{ssa=Linear}=St) when is_map(Defs) -> %% Find all the blocks that use variables defined by %% get_tuple_element instructions. Used = used_blocks(Linear, Defs, []), @@ -2415,9 +2613,8 @@ filter_deflocs([{Tuple,DefLoc0}|DLs], Preds, Blocks) -> [{_,{_,First}}|_] = DefLoc0, Paths = find_paths_to_check(DefLoc0, First), WillGC0 = ordsets:from_list([FromTo || {{_,_}=FromTo,_} <- Paths]), - WillGC1 = [{{From,To},will_gc(From, To, Preds, Blocks, true)} || - {From,To} <- WillGC0], - WillGC = maps:from_list(WillGC1), + WillGC = #{{From,To} => will_gc(From, To, Preds, Blocks, true) || + {From,To} <- WillGC0}, %% Separate sinks that will force the reference to the tuple to be %% saved on the stack from sinks that don't force. @@ -2552,7 +2749,7 @@ gc_update_successors(Blk, GC, WillGC) -> %% Return an gbset of block labels for the blocks that are not %% suitable for sinking of get_tuple_element instructions. -unsuitable(Linear, Blocks, Predecessors) -> +unsuitable(Linear, Blocks, Predecessors) when is_map(Blocks), is_map(Predecessors) -> Unsuitable0 = unsuitable_1(Linear), Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors), gb_sets:from_list(Unsuitable0 ++ Unsuitable1). @@ -2560,6 +2757,7 @@ unsuitable(Linear, Blocks, Predecessors) -> unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}=I|_]}}|Bs]) -> Unsuitable = case Op of bs_extract -> true; + bs_match -> true; {float,_} -> true; landingpad -> true; _ -> beam_ssa:is_loop_header(I) @@ -2791,6 +2989,7 @@ collect_get_tuple_element(Is, _Src, Acc) -> ssa_opt_unfold_literals({St,FuncDb}) -> #opt_st{ssa=Blocks0,args=Args,anno=Anno} = St, + true = is_map(Blocks0), %Assertion. ParamInfo = maps:get(parameter_info, Anno, #{}), LitMap = collect_arg_literals(Args, ParamInfo, 0, #{}), case map_size(LitMap) of @@ -2823,6 +3022,8 @@ collect_arg_literals([V|Vs], Info, X, Acc0) -> collect_arg_literals([], _Info, _X, Acc) -> Acc. +unfold_literals([?EXCEPTION_BLOCK|Ls], LitMap, SafeMap, Blocks) -> + unfold_literals(Ls, LitMap, SafeMap,Blocks); unfold_literals([L|Ls], LitMap, SafeMap0, Blocks0) -> {Blocks,Safe} = case map_get(L, SafeMap0) of @@ -2955,6 +3156,7 @@ unfold_arg(Expr, _LitMap, _X) -> ssa_opt_tail_literals({St,FuncDb}) -> #opt_st{cnt=Count0,ssa=Blocks0} = St, + true = is_map(Blocks0), %Assertion. {Count, Blocks} = opt_tail_literals(beam_ssa:rpo(Blocks0), Count0, Blocks0), {St#opt_st{cnt=Count,ssa=Blocks},FuncDb}. @@ -3039,7 +3241,7 @@ is_tail_literal(_Is, _Last, _Blocks) -> %%% ret Var %%% -ssa_opt_redundant_br({#opt_st{ssa=Blocks0}=St, FuncDb}) -> +ssa_opt_redundant_br({#opt_st{ssa=Blocks0}=St, FuncDb}) when is_map(Blocks0) -> Blocks = redundant_br(beam_ssa:rpo(Blocks0), Blocks0), {St#opt_st{ssa=Blocks}, FuncDb}. @@ -3123,6 +3325,156 @@ redundant_br_safe_bool(Is, Bool) -> end. %%% +%%% Add the bs_ensure instruction before a sequence of `bs_match` +%%% (SSA) instructions, each having a literal size and the +%%% same failure label. +%%% +%%% This is the first part of building the `bs_match` (BEAM) +%%% instruction that can match multiple segments having the same +%%% failure label. +%%% + +ssa_opt_bs_ensure({#opt_st{ssa=Blocks0,cnt=Count0}=St, FuncDb}) when is_map(Blocks0) -> + RPO = beam_ssa:rpo(Blocks0), + Seen = sets:new([{version,2}]), + {Blocks,Count} = ssa_opt_bs_ensure(RPO, Seen, Count0, Blocks0), + {St#opt_st{ssa=Blocks,cnt=Count}, FuncDb}. + +ssa_opt_bs_ensure([L|Ls], Seen0, Count0, Blocks0) -> + case sets:is_element(L, Seen0) of + true -> + %% This block is already covered by a `bs_ensure` + %% instruction. + ssa_opt_bs_ensure(Ls, Seen0, Count0, Blocks0); + false -> + case is_bs_match_blk(L, Blocks0) of + no -> + ssa_opt_bs_ensure(Ls, Seen0, Count0, Blocks0); + {yes,Size0,#b_br{succ=Succ,fail=Fail}} -> + {Size,Blocks1,Seen} = + ssa_opt_bs_ensure_collect(Succ, Fail, + Blocks0, Seen0, Size0), + Blocks2 = annotate_match(L, Blocks1), + {Blocks,Count} = build_bs_ensure_match(L, Size, Count0, Blocks2), + ssa_opt_bs_ensure(Ls, Seen, Count, Blocks) + end + end; +ssa_opt_bs_ensure([], _Seen, Count, Blocks) -> + {Blocks,Count}. + +ssa_opt_bs_ensure_collect(L, Fail, Blocks0, Seen0, Acc0) -> + case is_bs_match_blk(L, Blocks0) of + no -> + {Acc0,Blocks0,Seen0}; + {yes,Size,#b_br{succ=Succ,fail=Fail}} -> + case update_size(Size, Acc0) of + no -> + {Acc0,Blocks0,Seen0}; + Acc -> + Seen = sets:add_element(L, Seen0), + Blocks = annotate_match(L, Blocks0), + ssa_opt_bs_ensure_collect(Succ, Fail, Blocks, Seen, Acc) + end; + {yes,_,_} -> + {Acc0,Blocks0,Seen0} + end. + +annotate_match(L, Blocks) -> + #b_blk{is=Is0} = Blk0 = map_get(L, Blocks), + Is = [case I of + #b_set{op=bs_match} -> + beam_ssa:add_anno(ensured, true, I); + #b_set{} -> + I + end || I <- Is0], + Blk = Blk0#b_blk{is=Is}, + Blocks#{L := Blk}. + +update_size({{PrevCtx,NewCtx},Size,Unit}, {{_,PrevCtx},Sum,Unit0}) -> + {{PrevCtx,NewCtx},Sum + Size,max(Unit, Unit0)}; +update_size(_, _) -> + no. + +is_bs_match_blk(L, Blocks) -> + Blk = map_get(L, Blocks), + case Blk of + #b_blk{is=Is,last=#b_br{bool=#b_var{}}=Last} -> + case is_bs_match_is(Is) of + no -> + no; + {yes,CtxSizeUnit} -> + {yes,CtxSizeUnit,Last} + end; + #b_blk{} -> + no + end. + +is_bs_match_is([#b_set{op=bs_match,dst=Dst}=I, + #b_set{op={succeeded,guard},args=[Dst]}]) -> + case is_viable_match(I) of + no -> + no; + {yes,{Ctx,Size,Unit}} when Size bsr 24 =:= 0 -> + %% Only include matches of reasonable size. + {yes,{{Ctx,Dst},Size,Unit}}; + {yes,_} -> + %% Too large size. + no + end; +is_bs_match_is([_|Is]) -> + is_bs_match_is(Is); +is_bs_match_is([]) -> no. + +is_viable_match(#b_set{op=bs_match,args=Args}) -> + case Args of + [#b_literal{val=binary},Ctx,_,#b_literal{val=all},#b_literal{val=U}] + when is_integer(U), 1 =< U, U =< 256 -> + {yes,{Ctx,0,U}}; + [#b_literal{val=binary},Ctx,_,#b_literal{val=Size},#b_literal{val=U}] + when is_integer(Size) -> + {yes,{Ctx,Size*U,1}}; + [#b_literal{val=integer},Ctx,_,#b_literal{val=Size},#b_literal{val=U}] + when is_integer(Size) -> + {yes,{Ctx,Size*U,1}}; + [#b_literal{val=skip},Ctx,_,_,#b_literal{val=all},#b_literal{val=U}] -> + {yes,{Ctx,0,U}}; + [#b_literal{val=skip},Ctx,_,_,#b_literal{val=Size},#b_literal{val=U}] + when is_integer(Size) -> + {yes,{Ctx,Size*U,1}}; + [#b_literal{val=string},Ctx,#b_literal{val=Str}] when bit_size(Str) =< 64 -> + {yes,{Ctx,bit_size(Str),1}}; + _ -> + no + end. + +build_bs_ensure_match(L, {_,Size,Unit}, Count0, Blocks0) -> + BsMatchL = Count0, + Count1 = Count0 + 1, + {NewCtx,Count2} = new_var('@context', Count1), + {SuccBool,Count} = new_var('@ssa_bool', Count2), + + BsMatchBlk0 = map_get(L, Blocks0), + + #b_blk{is=MatchIs,last=#b_br{fail=Fail}} = BsMatchBlk0, + {Prefix,Suffix0} = splitwith(fun(#b_set{op=Op}) -> Op =/= bs_match end, MatchIs), + [BsMatch0|Suffix1] = Suffix0, + #b_set{args=[Type,_Ctx|Args]} = BsMatch0, + BsMatch = BsMatch0#b_set{args=[Type,NewCtx|Args]}, + Suffix = [BsMatch|Suffix1], + BsMatchBlk = BsMatchBlk0#b_blk{is=Suffix}, + + #b_set{args=[_,Ctx|_]} = keyfind(bs_match, #b_set.op, MatchIs), + Is = Prefix ++ [#b_set{op=bs_ensure, + dst=NewCtx, + args=[Ctx,#b_literal{val=Size},#b_literal{val=Unit}]}, + #b_set{op={succeeded,guard},dst=SuccBool,args=[NewCtx]}], + Blk = #b_blk{is=Is,last=#b_br{bool=SuccBool,succ=BsMatchL,fail=Fail}}, + + Blocks = Blocks0#{L := Blk, BsMatchL => BsMatchBlk}, + + {Blocks,Count}. + +%%% %%% Common utilities. %%% |