Optimize binary matching for fixed-width segments

Consider this function: foo(<<A:6, B:6, C:6, D:6>>) -> {A, B, C, D}. The compiler in Erlang/OTP 25 and earlier would generate the following code for doing the binary matching: {test,bs_start_match3,{f,1},1,[{x,0}],{x,1}}. {bs_get_position,{x,1},{x,0},2}. {test,bs_get_integer2, {f,3}, 2, [{x,1}, {integer,6}, 1, {field_flags,[{anno,[4,{file,"t.erl"}]},unsigned,big]}], {x,2}}. {test,bs_get_integer2, {f,3}, 3, [{x,1}, {integer,6}, 1, {field_flags,[{anno,[4,{file,"t.erl"}]},unsigned,big]}], {x,3}}. {test,bs_get_integer2, {f,3}, 4, [{x,1}, {integer,6}, 1, {field_flags,[{anno,[4,{file,"t.erl"}]},unsigned,big]}], {x,4}}. {test,bs_get_integer2, {f,3}, 5, [{x,1}, {integer,6}, 1, {field_flags,[{anno,[4,{file,"t.erl"}]},unsigned,big]}], {x,5}}. {test,bs_test_tail2,{f,3},[{x,1},0]}. That is, there would be one instruction for each segment being matched. Having separate match instructions for each segment makes it difficult for the JIT to do any serious optimization. Currently, when matching a segment with a size that is not a multiple of 8, the JIT will generate code that calls a helper function. Common sizes such as 8, 16, and 32 are specially optimized with inline code in the x86 JIT and in the non-JIT BEAM VM. This commit introduces a new `bs_match` instruction for matching of integer and binary segments of fixed size. Here is the generated code for the example: {test,bs_start_match3,{f,1},1,[{x,0}],{x,1}}. {bs_get_position,{x,1},{x,0},2}. {bs_match,{f,3}, {x,1}, {commands,[{ensure_exactly,24}, {integer,2,{literal,[]},6,1,{x,2}}, {integer,3,{literal,[]},6,1,{x,3}}, {integer,4,{literal,[]},6,1,{x,4}}, {integer,5,{literal,[]},6,1,{x,5}}]}}. Having only one instruction for the matching allows the JIT to generate faster code. The generated code will do the following: * Test that the size of the binary being matched is exactly 24 bits. * Read 24 bits from the binary into a temporary CPU register. * For each segment, extract the integer from the temporary register by shifting and masking. Because of the before-mentioned optimization for certain common segment sizes, the main part of the Base64 encoding in the `base64` module is currently implemented in the following non-intuitive way: encode_binary(<<B1:8, B2:8, B3:8, Ls/bits>>, A) -> BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, encode_binary(Ls, <<A/bits,(b64e(BB bsr 18)):8, (b64e((BB bsr 12) band 63)):8, (b64e((BB bsr 6) band 63)):8, (b64e(BB band 63)):8>>) With the new optimization, it is now possible to express the Base64 encoding in a more natural way, which is also faster than before: encode_binary(<<B1:6, B2:6, B3:6, B4:6, Ls/bits>>, A) -> encode_binary(Ls, <<A/bits, (b64e(B1)):8, (b64e(B2)):8, (b64e(B3)):8, (b64e(B4)):8>>)
author: Björn Gustavsson <bjorn@erlang.org> 2022-08-02 09:58:05 +0200
committer: Björn Gustavsson <bjorn@erlang.org> 2022-09-02 05:52:15 +0200
commit: 4f0ec73674b5c042084b528642185f968f7d9981 (patch)
tree: f4ab47cc625dc7fc47a4e7dfd798dbd4a04693ab /lib/compiler/src/beam_asm.erl
parent: f962bc3636d4f8bd25d77ace27294c274c49a6ca (diff)
download: erlang-4f0ec73674b5c042084b528642185f968f7d9981.tar.gz
1 files changed, 8 insertions, 1 deletions
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index bbbc844576..d959e21ea1 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -26,7 +26,7 @@
 
 -export_type([fail/0,label/0,src/0,module_code/0,function_name/0]).
 
--import(lists, [map/2,member/2,keymember/3,duplicate/2,splitwith/2]).
+-import(lists, [append/1,duplicate/2,map/2,member/2,keymember/3,splitwith/2]).
 
 -include("beam_opcodes.hrl").
 -include("beam_asm.hrl").
@@ -481,6 +481,13 @@ encode_arg({extfunc, M, F, A}, Dict0) ->
 encode_arg({list, List}, Dict0) ->
     {L, Dict} = encode_list(List, Dict0, []),
     {[encode(?tag_z, 1), encode(?tag_u, length(List))|L], Dict};
+encode_arg({commands, List0}, Dict) ->
+    List1 = [begin
+                 [H|T] = tuple_to_list(Tuple),
+                 [{atom,H}|T]
+             end || Tuple <- List0],
+    List = append(List1),
+    encode_arg({list, List}, Dict);
 encode_arg({float, Float}, Dict) when is_float(Float) ->
     encode_literal(Float, Dict);
 encode_arg({fr,Fr}, Dict) ->
author	Björn Gustavsson <bjorn@erlang.org>	2022-08-02 09:58:05 +0200
committer	Björn Gustavsson <bjorn@erlang.org>	2022-09-02 05:52:15 +0200
commit	4f0ec73674b5c042084b528642185f968f7d9981 (patch)
tree	f4ab47cc625dc7fc47a4e7dfd798dbd4a04693ab /lib/compiler/src/beam_asm.erl
parent	f962bc3636d4f8bd25d77ace27294c274c49a6ca (diff)
download	erlang-4f0ec73674b5c042084b528642185f968f7d9981.tar.gz