summaryrefslogtreecommitdiff
path: root/erts/emulator/beam/jit/arm/instr_bs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/jit/arm/instr_bs.cpp')
-rw-r--r--erts/emulator/beam/jit/arm/instr_bs.cpp2579
1 files changed, 2359 insertions, 220 deletions
diff --git a/erts/emulator/beam/jit/arm/instr_bs.cpp b/erts/emulator/beam/jit/arm/instr_bs.cpp
index 06873cd709..d7e8f70d83 100644
--- a/erts/emulator/beam/jit/arm/instr_bs.cpp
+++ b/erts/emulator/beam/jit/arm/instr_bs.cpp
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2020-2022. All Rights Reserved.
+ * Copyright Ericsson AB 2020-2023. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
*/
#include "beam_asm.hpp"
+#include <numeric>
extern "C"
{
@@ -29,8 +30,6 @@ extern "C"
/* Clobbers TMP1+TMP2
*
- * If max_size > 0, we jump to the fail label when Size > max_size
- *
* Returns -1 when the field check always fails, 1 if it may fail, and 0 if it
* never fails. */
int BeamModuleAssembler::emit_bs_get_field_size(const ArgSource &Size,
@@ -55,18 +54,40 @@ int BeamModuleAssembler::emit_bs_get_field_size(const ArgSource &Size,
return -1;
} else {
auto size_reg = load_source(Size, TMP2);
+ bool can_fail = true;
+
+ if (always_small(Size)) {
+ auto [min, max] = getClampedRange(Size);
+ can_fail =
+ !(0 <= min && (max >> (SMALL_BITS - ERL_UNIT_BITS)) == 0);
+ }
/* Negating the tag bits lets us guard against non-smalls, negative
* numbers, and overflow with a single `tst` instruction. */
ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
ASSERT(unit <= 1024);
- a.eor(out, size_reg.reg, imm(_TAG_IMMED1_SMALL));
- a.tst(out, imm(0xFFF0000000000000UL | _TAG_IMMED1_MASK));
+ if (!can_fail) {
+ comment("simplified segment size checks because "
+ "the types are known");
+ }
+
+ if (unit == 1 && !can_fail) {
+ a.lsr(out, size_reg.reg, imm(_TAG_IMMED1_SIZE));
+ } else {
+ a.eor(out, size_reg.reg, imm(_TAG_IMMED1_SMALL));
+ }
+
+ if (can_fail) {
+ a.tst(out, imm(0xFFF0000000000000UL | _TAG_IMMED1_MASK));
+ }
if (unit == 0) {
/* Silly but legal.*/
mov_imm(out, 0);
+ } else if (unit == 1 && !can_fail) {
+ /* The result is already in the out register. */
+ ;
} else if (Support::isPowerOf2(unit)) {
int trailing_bits = Support::ctz<Eterm>(unit);
@@ -88,9 +109,11 @@ int BeamModuleAssembler::emit_bs_get_field_size(const ArgSource &Size,
a.mul(out, out, TMP1);
}
- a.b_ne(fail);
+ if (can_fail) {
+ a.b_ne(fail);
+ }
- return 1;
+ return can_fail;
}
}
@@ -102,7 +125,7 @@ void BeamModuleAssembler::emit_i_bs_init_heap(const ArgWord &Size,
mov_arg(ARG5, Heap);
mov_arg(ARG6, Live);
- emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_enter_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
a.mov(ARG1, c_p);
@@ -110,7 +133,7 @@ void BeamModuleAssembler::emit_i_bs_init_heap(const ArgWord &Size,
load_erl_bits_state(ARG3);
runtime_call<6>(beam_jit_bs_init);
- emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_leave_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
mov_arg(Dst, ARG1);
@@ -148,7 +171,7 @@ void BeamModuleAssembler::emit_i_bs_init_fail_heap(const ArgSource &Size,
mov_arg(ARG5, Heap);
mov_arg(ARG6, Live);
- emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_enter_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
a.mov(ARG1, c_p);
@@ -156,7 +179,7 @@ void BeamModuleAssembler::emit_i_bs_init_fail_heap(const ArgSource &Size,
load_erl_bits_state(ARG3);
runtime_call<6>(beam_jit_bs_init);
- emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_leave_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
mov_arg(Dst, ARG1);
@@ -207,7 +230,7 @@ void BeamModuleAssembler::emit_i_bs_init_bits_heap(const ArgWord &NumBits,
mov_arg(ARG5, Alloc);
mov_arg(ARG6, Live);
- emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_enter_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
a.mov(ARG1, c_p);
@@ -215,7 +238,7 @@ void BeamModuleAssembler::emit_i_bs_init_bits_heap(const ArgWord &NumBits,
load_erl_bits_state(ARG3);
runtime_call<6>(beam_jit_bs_init_bits);
- emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_leave_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
mov_arg(Dst, ARG1);
@@ -248,7 +271,7 @@ void BeamModuleAssembler::emit_i_bs_init_bits_fail_heap(
mov_arg(ARG5, Alloc);
mov_arg(ARG6, Live);
- emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_enter_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
a.mov(ARG1, c_p);
@@ -256,7 +279,7 @@ void BeamModuleAssembler::emit_i_bs_init_bits_fail_heap(
load_erl_bits_state(ARG3);
runtime_call<6>(beam_jit_bs_init_bits);
- emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_leave_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get());
mov_arg(Dst, ARG1);
@@ -541,19 +564,18 @@ void BeamModuleAssembler::emit_i_bs_start_match3(const ArgRegister &Src,
a.bind(is_binary);
{
- /* Src is not guaranteed to be inside the live range, so we need to
- * stash it during GC. */
- emit_gc_test_preserve(ArgVal(ArgVal::Word, ERL_BIN_MATCHSTATE_SIZE(0)),
+ emit_gc_test_preserve(ArgWord(ERL_BIN_MATCHSTATE_SIZE(0)),
Live,
+ Src,
ARG2);
- emit_enter_runtime<Update::eStack | Update::eHeap>(Live.get());
+ emit_enter_runtime<Update::eHeapOnlyAlloc>(Live.get());
a.mov(ARG1, c_p);
/* ARG2 was set above */
runtime_call<2>(erts_bs_start_match_3);
- emit_leave_runtime<Update::eStack | Update::eHeap>(Live.get());
+ emit_leave_runtime<Update::eHeapOnlyAlloc>(Live.get());
a.add(ARG2, ARG1, imm(TAG_PRIMARY_BOXED));
}
@@ -585,9 +607,8 @@ void BeamModuleAssembler::emit_i_bs_match_string(const ArgRegister &Ctx,
a.and_(ARG4, TMP2, imm(7));
/* ARG3 = mb->base + (mb->offset >> 3) */
- a.lsr(TMP2, TMP2, imm(3));
a.ldur(TMP1, emit_boxed_val(ctx_reg.reg, base_offset));
- a.add(ARG3, TMP1, TMP2);
+ a.add(ARG3, TMP1, TMP2, arm::lsr(3));
}
emit_enter_runtime();
@@ -624,77 +645,89 @@ void BeamModuleAssembler::emit_i_bs_get_position(const ArgRegister &Ctx,
flush_var(dst_reg);
}
-void BeamModuleAssembler::emit_i_bs_get_fixed_integer(const ArgRegister &Ctx,
- const ArgLabel &Fail,
- const ArgWord &Live,
- const ArgWord &Flags,
- const ArgWord &Bits,
- const ArgRegister &Dst) {
- auto ctx = load_source(Ctx, TMP1);
- int flags, bits;
-
- flags = Flags.get();
- bits = Bits.get();
+void BeamModuleAssembler::emit_bs_get_integer2(const ArgLabel &Fail,
+ const ArgRegister &Ctx,
+ const ArgWord &Live,
+ const ArgSource &Sz,
+ const ArgWord &Unit,
+ const ArgWord &Flags,
+ const ArgRegister &Dst) {
+ Uint size;
+ Uint flags = Flags.get();
- if (bits >= SMALL_BITS) {
- emit_gc_test_preserve(ArgVal(ArgVal::Word, BIG_NEED_FOR_BITS(bits)),
- Live,
- ctx.reg);
+ if (flags & BSF_NATIVE) {
+ flags &= ~BSF_NATIVE;
+ flags |= BSF_LITTLE;
}
- lea(ARG4, emit_boxed_val(ctx.reg, offsetof(ErlBinMatchState, mb)));
-
- if (bits >= SMALL_BITS) {
- emit_enter_runtime<Update::eHeap>(Live.get());
+ if (Sz.isSmall() && Sz.as<ArgSmall>().getUnsigned() < 8 * sizeof(Uint) &&
+ (size = Sz.as<ArgSmall>().getUnsigned() * Unit.get()) <
+ 8 * sizeof(Uint)) {
+ /* Segment of a fixed size supported by bs_match. */
+ const ArgVal match[] = {ArgAtom(am_ensure_at_least),
+ ArgWord(size),
+ ArgWord(1),
+ ArgAtom(am_integer),
+ Live,
+ ArgWord(flags),
+ ArgWord(size),
+ ArgWord(1),
+ Dst};
+
+ const Span<ArgVal> args(match, sizeof(match) / sizeof(match[0]));
+ emit_i_bs_match(Fail, Ctx, args);
} else {
- emit_enter_runtime(Live.get());
- }
-
- a.mov(ARG1, c_p);
- a.mov(ARG2, bits);
- a.mov(ARG3, flags);
- /* ARG4 set above. */
- runtime_call<4>(erts_bs_get_integer_2);
-
- if (bits >= SMALL_BITS) {
- emit_leave_runtime<Update::eHeap>(Live.get());
- } else {
- emit_leave_runtime(Live.get());
- }
-
- emit_branch_if_not_value(ARG1, resolve_beam_label(Fail, dispUnknown));
- mov_arg(Dst, ARG1);
-}
-
-void BeamModuleAssembler::emit_i_bs_get_integer(const ArgRegister &Ctx,
- const ArgLabel &Fail,
- const ArgWord &Live,
- const ArgWord &FlagsAndUnit,
- const ArgSource &Sz,
- const ArgRegister &Dst) {
- Label fail;
- int unit;
-
- fail = resolve_beam_label(Fail, dispUnknown);
- unit = FlagsAndUnit.get() >> 3;
-
- if (emit_bs_get_field_size(Sz, unit, fail, ARG5) >= 0) {
- mov_arg(ARG3, Ctx);
- mov_arg(ARG4, FlagsAndUnit);
- mov_arg(ARG6, Live);
+ Label fail = resolve_beam_label(Fail, dispUnknown);
+ int unit = Unit.get();
+
+ if (emit_bs_get_field_size(Sz, unit, fail, ARG5) >= 0) {
+ /* This operation can be expensive if a bignum can be
+ * created because there can be a garbage collection. */
+ auto max = std::get<1>(getClampedRange(Sz));
+ bool potentially_expensive =
+ max >= SMALL_BITS || (max * Unit.get()) >= SMALL_BITS;
+
+ mov_arg(ARG3, Ctx);
+ mov_imm(ARG4, flags);
+ if (potentially_expensive) {
+ mov_arg(ARG6, Live);
+ } else {
+#ifdef DEBUG
+ /* Never actually used. */
+ mov_imm(ARG6, 1023);
+#endif
+ }
- emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
- Update::eReductions>(Live.get());
+ if (potentially_expensive) {
+ emit_enter_runtime<Update::eHeapAlloc | Update::eXRegs |
+ Update::eReductions>(Live.get());
+ } else {
+ comment("simplified entering runtime because result is always "
+ "small");
+ emit_enter_runtime(Live.get());
+ }
- a.mov(ARG1, c_p);
- load_x_reg_array(ARG2);
- runtime_call<6>(beam_jit_bs_get_integer);
+ a.mov(ARG1, c_p);
+ if (potentially_expensive) {
+ load_x_reg_array(ARG2);
+ } else {
+#ifdef DEBUG
+ /* Never actually used. */
+ mov_imm(ARG2, 0);
+#endif
+ }
+ runtime_call<6>(beam_jit_bs_get_integer);
- emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
- Update::eReductions>(Live.get());
+ if (potentially_expensive) {
+ emit_leave_runtime<Update::eHeapAlloc | Update::eXRegs |
+ Update::eReductions>(Live.get());
+ } else {
+ emit_leave_runtime(Live.get());
+ }
- emit_branch_if_not_value(ARG1, fail);
- mov_arg(Dst, ARG1);
+ emit_branch_if_not_value(ARG1, fail);
+ mov_arg(Dst, ARG1);
+ }
}
}
@@ -738,11 +771,7 @@ void BeamModuleAssembler::emit_i_bs_get_binary_all2(const ArgRegister &Ctx,
mov_arg(ARG1, Ctx);
- /* Ctx is not guaranteed to be inside the live range, so we need to stash
- * it during GC. */
- emit_gc_test_preserve(ArgVal(ArgVal::Word, EXTRACT_SUB_BIN_HEAP_NEED),
- Live,
- ARG1);
+ emit_gc_test_preserve(ArgWord(EXTRACT_SUB_BIN_HEAP_NEED), Live, Ctx, ARG1);
/* Make field fetching slightly more compact by pre-loading the match
* buffer into the right argument slot for `erts_bs_get_binary_all_2`. */
@@ -770,13 +799,13 @@ void BeamModuleAssembler::emit_i_bs_get_binary_all2(const ArgRegister &Ctx,
}
}
- emit_enter_runtime<Update::eHeap>(Live.get());
+ emit_enter_runtime<Update::eHeapOnlyAlloc>(Live.get());
a.mov(ARG1, c_p);
/* ARG2 was set above. */
runtime_call<2>(erts_bs_get_binary_all_2);
- emit_leave_runtime<Update::eHeap>(Live.get());
+ emit_leave_runtime<Update::eHeapOnlyAlloc>(Live.get());
mov_arg(Dst, ARG1);
}
@@ -796,11 +825,11 @@ void BeamGlobalAssembler::emit_bs_get_tail_shared() {
a.sub(ARG5, TMP1, ARG4);
emit_enter_runtime_frame();
- emit_enter_runtime<Update::eHeap>();
+ emit_enter_runtime<Update::eHeapOnlyAlloc>();
runtime_call<5>(erts_extract_sub_binary);
- emit_leave_runtime<Update::eHeap>();
+ emit_leave_runtime<Update::eHeapOnlyAlloc>();
emit_leave_runtime_frame();
a.ret(a64::x30);
@@ -811,11 +840,7 @@ void BeamModuleAssembler::emit_bs_get_tail(const ArgRegister &Ctx,
const ArgWord &Live) {
mov_arg(ARG1, Ctx);
- /* Ctx is not guaranteed to be inside the live range, so we need to stash
- * it during GC. */
- emit_gc_test_preserve(ArgVal(ArgVal::Word, EXTRACT_SUB_BIN_HEAP_NEED),
- Live,
- ARG1);
+ emit_gc_test_preserve(ArgWord(EXTRACT_SUB_BIN_HEAP_NEED), Live, Ctx, ARG1);
fragment_call(ga->get_bs_get_tail_shared());
@@ -841,12 +866,34 @@ void BeamModuleAssembler::emit_bs_skip_bits(const ArgLabel &Fail,
}
void BeamModuleAssembler::emit_i_bs_skip_bits2(const ArgRegister &Ctx,
- const ArgRegister &Bits,
+ const ArgRegister &Size,
const ArgLabel &Fail,
const ArgWord &Unit) {
Label fail = resolve_beam_label(Fail, dispUnknown);
- if (emit_bs_get_field_size(Bits, Unit.get(), fail, ARG1) >= 0) {
+ bool can_fail = true;
+
+ if (always_small(Size)) {
+ auto [min, max] = getClampedRange(Size);
+ can_fail = !(0 <= min && (max >> (SMALL_BITS - ERL_UNIT_BITS)) == 0);
+ }
+
+ if (!can_fail && Unit.get() == 1) {
+ comment("simplified skipping because the types are known");
+
+ const int position_offset = offsetof(ErlBinMatchState, mb.offset);
+ const int size_offset = offsetof(ErlBinMatchState, mb.size);
+ auto [ctx, size] = load_sources(Ctx, TMP1, Size, TMP2);
+
+ a.ldur(TMP3, emit_boxed_val(ctx.reg, position_offset));
+ a.ldur(TMP4, emit_boxed_val(ctx.reg, size_offset));
+
+ a.add(TMP3, TMP3, size.reg, arm::lsr(_TAG_IMMED1_SIZE));
+ a.cmp(TMP3, TMP4);
+ a.b_hi(resolve_beam_label(Fail, disp1MB));
+
+ a.stur(TMP3, emit_boxed_val(ctx.reg, position_offset));
+ } else if (emit_bs_get_field_size(Size, Unit.get(), fail, ARG1) >= 0) {
emit_bs_skip_bits(Fail, Ctx);
}
}
@@ -875,22 +922,21 @@ void BeamModuleAssembler::emit_i_bs_get_binary2(const ArgRegister &Ctx,
mov_arg(ARG4, Ctx);
- /* Ctx is not guaranteed to be inside the live range, so we need to
- * stash it during GC. */
- emit_gc_test_preserve(ArgVal(ArgVal::Word, EXTRACT_SUB_BIN_HEAP_NEED),
+ emit_gc_test_preserve(ArgWord(EXTRACT_SUB_BIN_HEAP_NEED),
Live,
+ Ctx,
ARG4);
lea(ARG4, emit_boxed_val(ARG4, offsetof(ErlBinMatchState, mb)));
- emit_enter_runtime<Update::eHeap>(Live.get());
+ emit_enter_runtime<Update::eHeapOnlyAlloc>(Live.get());
a.mov(ARG1, c_p);
a.ldr(ARG2, TMP_MEM1q);
mov_imm(ARG3, Flags.get());
runtime_call<4>(erts_bs_get_binary_2);
- emit_leave_runtime<Update::eHeap>(Live.get());
+ emit_leave_runtime<Update::eHeapOnlyAlloc>(Live.get());
emit_branch_if_not_value(ARG1, fail);
@@ -912,20 +958,18 @@ void BeamModuleAssembler::emit_i_bs_get_float2(const ArgRegister &Ctx,
mov_arg(ARG4, Ctx);
- /* Ctx is not guaranteed to be inside the live range, so we need to stash
- * it during GC. */
- emit_gc_test_preserve(ArgWord(FLOAT_SIZE_OBJECT), Live, ARG4);
+ emit_gc_test_preserve(ArgWord(FLOAT_SIZE_OBJECT), Live, Ctx, ARG4);
if (emit_bs_get_field_size(Sz, unit, fail, ARG2) >= 0) {
lea(ARG4, emit_boxed_val(ARG4, offsetof(ErlBinMatchState, mb)));
- emit_enter_runtime<Update::eHeap>(Live.get());
+ emit_enter_runtime<Update::eHeapOnlyAlloc>(Live.get());
a.mov(ARG1, c_p);
mov_imm(ARG3, Flags.get());
runtime_call<4>(erts_bs_get_float_2);
- emit_leave_runtime<Update::eHeap>(Live.get());
+ emit_leave_runtime<Update::eHeapOnlyAlloc>(Live.get());
emit_branch_if_not_value(ARG1, fail);
@@ -983,18 +1027,283 @@ void BeamModuleAssembler::emit_i_bs_put_utf8(const ArgLabel &Fail,
}
}
+/*
+ * ARG1 = pointer to match state
+ * ARG2 = number of bits left in binary (< 32)
+ * ARG3 = position in binary in bits
+ * ARG4 = base pointer to binary data
+ *
+ * See the comment for emit_bs_get_utf8_shared() for details about the
+ * return value.
+ */
+void BeamGlobalAssembler::emit_bs_get_utf8_short_shared() {
+ const int position_offset = offsetof(ErlBinMatchBuffer, offset);
+
+ const arm::Gp match_state = ARG1;
+ const arm::Gp bitdata = ARG2;
+ const arm::Gp bin_position = ARG3;
+ const arm::Gp bin_base = ARG4;
+
+ Label two = a.newLabel();
+ Label three_or_more = a.newLabel();
+ Label four = a.newLabel();
+ Label read_done = a.newLabel();
+ Label ascii = a.newLabel();
+ Label error = a.newLabel();
+
+ /* Calculate the number of bytes remaining in the binary and error
+ * out if less than one. */
+ a.lsr(bitdata, bitdata, imm(3));
+ a.cbz(bitdata, error);
+
+ /* Calculate a byte mask so we can zero out trailing garbage. */
+ a.neg(TMP5, bitdata, arm::lsl(3));
+ mov_imm(TMP4, -1);
+ a.lsl(TMP4, TMP4, TMP5);
+
+ /* If the position in the binary is not byte-aligned, we'll need
+ * to read one more byte. */
+ a.ands(TMP1, bin_position, imm(7));
+ a.cinc(bitdata, bitdata, imm(arm::CondCode::kNE));
+
+ /* Set up pointer to the first byte to read. */
+ a.add(TMP2, bin_base, bin_position, arm::lsr(3));
+
+ a.cmp(bitdata, 2);
+ a.b_eq(two);
+ a.b_hi(three_or_more);
+
+ /* Read one byte (always byte-aligned). */
+ a.ldrb(bitdata.w(), arm::Mem(TMP2));
+ a.b(read_done);
+
+ /* Read two bytes. */
+ a.bind(two);
+ a.ldrh(bitdata.w(), arm::Mem(TMP2));
+ a.b(read_done);
+
+ a.bind(three_or_more);
+ a.cmp(bitdata, 3);
+ a.b_ne(four);
+
+ /* Read three bytes. */
+ a.ldrh(bitdata.w(), arm::Mem(TMP2));
+ a.ldrb(TMP3.w(), arm::Mem(TMP2, 2));
+ a.orr(bitdata, bitdata, TMP3, arm::lsl(16));
+ a.b(read_done);
+
+ /* Read four bytes (always unaligned). */
+ a.bind(four);
+ a.ldr(bitdata.w(), arm::Mem(TMP2));
+
+ /* Handle the bytes read. */
+ a.bind(read_done);
+ a.rev64(bitdata, bitdata);
+ a.lsl(bitdata, bitdata, TMP1);
+ a.and_(bitdata, bitdata, TMP4);
+ a.tbz(bitdata, imm(63), ascii);
+ a.b(labels[bs_get_utf8_shared]);
+
+ /* Handle plain old ASCII (code point < 128). */
+ a.bind(ascii);
+ a.add(bin_position, bin_position, imm(8));
+ a.str(bin_position, arm::Mem(match_state, position_offset));
+ a.mov(ARG1, imm(_TAG_IMMED1_SMALL));
+ a.orr(ARG1, ARG1, bitdata, arm::lsr(56 - _TAG_IMMED1_SIZE));
+ a.ret(a64::x30);
+
+ /* Signal error. */
+ a.bind(error);
+ mov_imm(ARG1, 0);
+ a.ret(a64::x30);
+}
+
+/*
+ * ARG1 = pointer to match state
+ * ARG2 = 4 bytes read from the binary in big-endian order
+ * ARG3 = position in binary in bits
+ *
+ * On successful return, the extracted code point is a term tagged
+ * small in ARG1 and the position in the match state has been updated. On
+ * failure, ARG1 contains an invalid term where the tags bits are zero.
+ */
+void BeamGlobalAssembler::emit_bs_get_utf8_shared() {
+ const int position_offset = offsetof(ErlBinMatchBuffer, offset);
+
+ const arm::Gp match_state = ARG1;
+ const arm::Gp bitdata = ARG2;
+ const arm::Gp bin_position = ARG3;
+
+ const arm::Gp byte_count = ARG4;
+
+ const arm::Gp shift = TMP4;
+ const arm::Gp control_mask = TMP5;
+ const arm::Gp error_mask = TMP6;
+
+ /* UTF-8 has the following layout, where 'x' are data bits:
+ *
+ * 1 byte: 0xxxxxxx (not handled by this path)
+ * 2 bytes: 110xxxxx, 10xxxxxx
+ * 3 bytes: 1110xxxx, 10xxxxxx 10xxxxxx
+ * 4 bytes: 11110xxx, 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * Note that the number of leading bits is equal to the number of bytes,
+ * which makes it very easy to create masks for extraction and error
+ * checking. */
+
+ /* Calculate the number of bytes. */
+ a.cls(byte_count, bitdata);
+ a.add(byte_count, byte_count, imm(1));
+
+ /* Get rid of the prefix bits. */
+ a.lsl(bitdata, bitdata, byte_count);
+ a.lsr(bitdata, bitdata, byte_count);
+
+ /* Calculate the bit shift now before we start to corrupt the
+ * byte_count. */
+ mov_imm(shift, 64);
+ a.sub(shift, shift, byte_count, arm::lsl(3));
+
+ /* Shift down the value to the least significant part of the word. */
+ a.lsr(bitdata, bitdata, shift);
+
+ /* Matches the '10xxxxxx' components, leaving the header byte alone. */
+ mov_imm(error_mask, 0x00808080ull << 32);
+ a.lsr(error_mask, error_mask, shift);
+
+ /* Construct the control mask '0x00C0C0C0' (already shifted). */
+ a.orr(control_mask, error_mask, error_mask, arm::lsr(1));
+
+ /* Assert that the header bits of each '10xxxxxx' component are correct,
+ * signaling errors by trashing the byte count with an illegal
+ * value (0). */
+ a.and_(TMP3, bitdata, control_mask);
+ a.cmp(TMP3, error_mask);
+
+ a.ubfx(TMP1, bitdata, imm(8), imm(6));
+ a.ubfx(TMP2, bitdata, imm(16), imm(6));
+ a.ubfx(TMP3, bitdata, imm(24), imm(3));
+ a.ubfx(bitdata, bitdata, imm(0), imm(6));
+
+ a.orr(bitdata, bitdata, TMP1, arm::lsl(6));
+ a.orr(bitdata, bitdata, TMP2, arm::lsl(12));
+ a.orr(bitdata, bitdata, TMP3, arm::lsl(18));
+
+ /* Check for too large code point. */
+ mov_imm(TMP1, 0x10FFFF);
+ a.ccmp(bitdata, TMP1, imm(NZCV::kCF), arm::CondCode::kEQ);
+
+ /* Check for the illegal range 16#D800 - 16#DFFF. */
+ a.lsr(TMP1, bitdata, imm(11));
+ a.ccmp(TMP1, imm(0xD800 >> 11), imm(NZCV::kZF), arm::CondCode::kLS);
+ a.csel(byte_count, byte_count, ZERO, imm(arm::CondCode::kNE));
+
+ /* Test for overlong UTF-8 sequence. That can be done by testing
+ * that the bits marked y below are all zero.
+ *
+ * 1 byte: 0xxxxxxx (not handled by this path)
+ * 2 bytes: 110yyyyx, 10xxxxxx
+ * 3 bytes: 1110yyyy, 10yxxxxx 10xxxxxx
+ * 4 bytes: 11110yyy, 10yyxxxx 10xxxxxx 10xxxxxx
+ *
+ * 1 byte: xx'xxxxx
+ * 2 bytes: y'yyyxx'xxxxx
+ * 3 bytes: y'yyyyx'xxxxx'xxxxx
+ * 4 bytes: y'yyyyx'xxxxx'xxxxx'xxxxx
+ *
+ * The y bits can be isolated by shifting down by the number of bits
+ * shown in this table:
+ *
+ * 2: 7 (byte_count * 4 - 1)
+ * 3: 11 (byte_count * 4 - 1)
+ * 4: 16 (byte_count * 4)
+ */
+
+ /* Calculate number of bits to shift. */
+ a.lsl(TMP1, byte_count, imm(2));
+ a.cmp(byte_count, imm(4));
+ a.csetm(TMP2, imm(arm::CondCode::kNE));
+ a.add(TMP1, TMP1, TMP2);
+
+ /* Pre-fill the tag bits so that we can clear them on error. */
+ mov_imm(TMP2, _TAG_IMMED1_SMALL);
+
+ /* Now isolate the y bits and compare to zero. This check will
+ * be used in a CCMP further down. */
+ a.lsr(TMP1, bitdata, TMP1);
+ a.cmp(TMP1, 0);
+
+ /* Byte count must be 2, 3, or 4. */
+ a.sub(TMP1, byte_count, imm(2));
+ a.ccmp(TMP1, imm(2), imm(NZCV::kCF), imm(arm::CondCode::kNE));
+
+ /* If we have failed, we set byte_count to zero to ensure that the
+ * position update nops, and set the pre-tagged result to zero so
+ * that we can check for error in module code by testing the tag
+ * bits. */
+ a.csel(byte_count, byte_count, ZERO, imm(arm::CondCode::kLS));
+ a.csel(TMP2, TMP2, ZERO, imm(arm::CondCode::kLS));
+
+ a.add(bin_position, bin_position, byte_count, arm::lsl(3));
+ a.str(bin_position, arm::Mem(match_state, position_offset));
+ a.orr(ARG1, TMP2, bitdata, arm::lsl(_TAG_IMMED1_SIZE));
+
+ a.ret(a64::x30);
+}
+
void BeamModuleAssembler::emit_bs_get_utf8(const ArgRegister &Ctx,
const ArgLabel &Fail) {
- mov_arg(ARG1, Ctx);
- lea(ARG1, emit_boxed_val(ARG1, offsetof(ErlBinMatchState, mb)));
+ const int base_offset = offsetof(ErlBinMatchBuffer, base);
+ const int position_offset = offsetof(ErlBinMatchBuffer, offset);
- emit_enter_runtime();
+ const arm::Gp match_state = ARG1;
+ const arm::Gp bitdata = ARG2;
+ const arm::Gp bin_position = ARG3;
+ const arm::Gp bin_base = ARG4;
+ const arm::Gp bin_size = ARG5;
- runtime_call<1>(erts_bs_get_utf8);
+ auto ctx = load_source(Ctx, ARG6);
- emit_leave_runtime();
+ Label non_ascii = a.newLabel();
+ Label fallback = a.newLabel();
+ Label check = a.newLabel();
+ Label done = a.newLabel();
- emit_branch_if_not_value(ARG1, resolve_beam_label(Fail, dispUnknown));
+ lea(match_state, emit_boxed_val(ctx.reg, offsetof(ErlBinMatchState, mb)));
+ ERTS_CT_ASSERT_FIELD_PAIR(ErlBinMatchBuffer, offset, size);
+ a.ldp(bin_position, bin_size, arm::Mem(ARG1, position_offset));
+ a.ldr(bin_base, arm::Mem(ARG1, base_offset));
+ a.sub(bitdata, bin_size, bin_position);
+ a.cmp(bitdata, imm(32));
+ a.b_lo(fallback);
+
+ emit_read_bits(32, bin_base, bin_position, bitdata);
+ a.tbnz(bitdata, imm(63), non_ascii);
+
+ /* Handle plain old ASCII (code point < 128). */
+ a.add(bin_position, bin_position, imm(8));
+ a.str(bin_position, arm::Mem(ARG1, position_offset));
+ a.mov(ARG1, imm(_TAG_IMMED1_SMALL));
+ a.orr(ARG1, ARG1, bitdata, arm::lsr(56 - _TAG_IMMED1_SIZE));
+ a.b(done);
+
+ /* Handle code point >= 128. */
+ a.bind(non_ascii);
+ fragment_call(ga->get_bs_get_utf8_shared());
+ a.b(check);
+
+ /*
+ * Handle the case that there are not 4 bytes available in the binary.
+ */
+
+ a.bind(fallback);
+ fragment_call(ga->get_bs_get_utf8_short_shared());
+
+ a.bind(check);
+ ERTS_CT_ASSERT((_TAG_IMMED1_SMALL & 1) != 0);
+ a.tbz(ARG1, imm(0), resolve_beam_label(Fail, disp32K));
+
+ a.bind(done);
}
void BeamModuleAssembler::emit_i_bs_get_utf8(const ArgRegister &Ctx,
@@ -1291,14 +1600,14 @@ void BeamModuleAssembler::emit_i_bs_append(const ArgLabel &Fail,
mov_arg(ArgXRegister(Live.get()), Bin);
- emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_enter_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get() + 1);
a.mov(ARG1, c_p);
load_x_reg_array(ARG2);
runtime_call<6>(erts_bs_append);
- emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_leave_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get() + 1);
if (Fail.get() != 0) {
@@ -1355,11 +1664,11 @@ void BeamModuleAssembler::emit_bs_init_writable() {
/* We have an implicit liveness of 0, so we don't need to stash X
* registers. */
- emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(0);
+ emit_enter_runtime<Update::eReductions | Update::eHeapAlloc>(0);
runtime_call<2>(erts_bs_init_writable);
- emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(0);
+ emit_leave_runtime<Update::eReductions | Update::eHeapAlloc>(0);
a.mov(XREG0, ARG1);
}
@@ -1367,7 +1676,7 @@ void BeamModuleAssembler::emit_bs_init_writable() {
void BeamGlobalAssembler::emit_bs_create_bin_error_shared() {
a.mov(XREG0, a64::x30);
- emit_enter_runtime<Update::eStack | Update::eHeap>(0);
+ emit_enter_runtime<Update::eHeapAlloc>(0);
/* ARG3 is already set by the caller */
a.mov(ARG2, ARG4);
@@ -1375,7 +1684,7 @@ void BeamGlobalAssembler::emit_bs_create_bin_error_shared() {
a.mov(ARG1, c_p);
runtime_call<4>(beam_jit_bs_construct_fail_info);
- emit_leave_runtime<Update::eStack | Update::eHeap>(0);
+ emit_leave_runtime<Update::eHeapAlloc>(0);
a.mov(ARG4, ZERO);
a.mov(ARG2, XREG0);
@@ -1429,10 +1738,49 @@ void BeamGlobalAssembler::emit_bs_bit_size_shared() {
a.ret(a64::x30);
}
+/*
+ * ARG1 = tagged bignum term
+ */
+void BeamGlobalAssembler::emit_get_sint64_shared() {
+ Label success = a.newLabel();
+ Label fail = a.newLabel();
+
+ emit_is_boxed(fail, ARG1);
+ arm::Gp boxed_ptr = emit_ptr_val(TMP3, ARG1);
+ a.ldr(TMP1, emit_boxed_val(boxed_ptr));
+ a.ldr(TMP2, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+ a.and_(TMP1, TMP1, imm(_TAG_HEADER_MASK));
+ a.cmp(TMP1, imm(POS_BIG_SUBTAG));
+ a.b_eq(success);
+
+ a.cmp(TMP1, imm(NEG_BIG_SUBTAG));
+ a.b_ne(fail);
+
+ a.neg(TMP2, TMP2);
+
+ a.bind(success);
+ {
+ a.mov(ARG1, TMP2);
+ /* Clear Z flag.
+ *
+ * TMP1 is known to be POS_BIG_SUBTAG or NEG_BIG_SUBTAG at this point.
+ */
+ ERTS_CT_ASSERT(POS_BIG_SUBTAG != 0 && NEG_BIG_SUBTAG != 0);
+ a.tst(TMP1, TMP1);
+ a.ret(a64::x30);
+ }
+
+ a.bind(fail);
+ {
+ a.tst(ZERO, ZERO);
+ a.ret(a64::x30);
+ }
+}
+
struct BscSegment {
BscSegment()
: type(am_false), unit(1), flags(0), src(ArgNil()), size(ArgNil()),
- error_info(0), effectiveSize(-1) {
+ error_info(0), effectiveSize(-1), action(action::DIRECT) {
}
Eterm type;
@@ -1443,19 +1791,443 @@ struct BscSegment {
Uint error_info;
Sint effectiveSize;
+
+ /* Here are sub actions for storing integer segments.
+ *
+ * We use the ACCUMULATE_FIRST and ACCUMULATE actions to shift the
+ * values of segments with known, small sizes (no more than 64 bits)
+ * into an accumulator register.
+ *
+ * When no more segments can be accumulated, the STORE action is
+ * used to store the value of the accumulator into the binary.
+ *
+ * The DIRECT action is used when it is not possible to use the
+ * accumulator (for unknown or too large sizes).
+ */
+ enum class action { DIRECT, ACCUMULATE_FIRST, ACCUMULATE, STORE } action;
};
+static std::vector<BscSegment> bs_combine_segments(
+ const std::vector<BscSegment> segments) {
+ std::vector<BscSegment> segs;
+
+ for (auto seg : segments) {
+ switch (seg.type) {
+ case am_integer: {
+ if (!(0 < seg.effectiveSize && seg.effectiveSize <= 64)) {
+ /* Unknown or too large size. Handle using the default
+ * DIRECT action. */
+ segs.push_back(seg);
+ continue;
+ }
+
+ if (seg.flags & BSF_LITTLE || segs.size() == 0 ||
+ segs.back().action == BscSegment::action::DIRECT) {
+ /* There are no previous compatible ACCUMULATE / STORE
+ * actions. Create the first ones. */
+ seg.action = BscSegment::action::ACCUMULATE_FIRST;
+ segs.push_back(seg);
+ seg.action = BscSegment::action::STORE;
+ segs.push_back(seg);
+ continue;
+ }
+
+ auto prev = segs.back();
+ if (prev.flags & BSF_LITTLE) {
+ /* Little-endian segments cannot be combined with other
+ * segments. Create new ACCUMULATE_FIRST / STORE actions. */
+ seg.action = BscSegment::action::ACCUMULATE_FIRST;
+ segs.push_back(seg);
+ seg.action = BscSegment::action::STORE;
+ segs.push_back(seg);
+ continue;
+ }
+
+ /* The current segment is compatible with the previous
+ * segment. Try combining them. */
+ if (prev.effectiveSize + seg.effectiveSize <= 64) {
+ /* The combined values of the segments fit in the
+ * accumulator. Insert an ACCUMULATE action for the
+ * current segment before the pre-existing STORE
+ * action. */
+ segs.pop_back();
+ prev.effectiveSize += seg.effectiveSize;
+ seg.action = BscSegment::action::ACCUMULATE;
+ segs.push_back(seg);
+ segs.push_back(prev);
+ } else {
+ /* The size exceeds 64 bits. Can't combine. */
+ seg.action = BscSegment::action::ACCUMULATE_FIRST;
+ segs.push_back(seg);
+ seg.action = BscSegment::action::STORE;
+ segs.push_back(seg);
+ }
+ break;
+ }
+ default:
+ segs.push_back(seg);
+ break;
+ }
+ }
+ return segs;
+}
+
+/*
+ * In:
+ * bin_offset = register to store the bit offset into the binary
+ * bit_offset = current bit offset into binary, or -1 if unknown
+ * size = size of segment to be constructed
+ * (ignored if size_reg is valid register)
+ * size_reg = if a valid register, it contains the size of
+ * the segment to be constructed
+ *
+ * Out:
+ * bin_offset register = if bit_offset is not byte aligned, the bit
+ * offset into the binary
+ * TMP1 = pointer to the current byte in the binary
+ *
+ * Preserves all other ARG* registers.
+ */
+void BeamModuleAssembler::update_bin_state(arm::Gp bin_offset,
+ Sint bit_offset,
+ Sint size,
+ arm::Gp size_reg) {
+ int cur_bin_offset = offsetof(ErtsSchedulerRegisters,
+ aux_regs.d.erl_bits_state.erts_current_bin_);
+ arm::Mem mem_bin_base = arm::Mem(scheduler_registers, cur_bin_offset);
+ arm::Mem mem_bin_offset =
+ arm::Mem(scheduler_registers, cur_bin_offset + sizeof(Eterm));
+
+ if (bit_offset % 8 != 0) {
+ /* The bit offset is unknown or not byte-aligned. */
+ ERTS_CT_ASSERT_FIELD_PAIR(struct erl_bits_state,
+ erts_current_bin_,
+ erts_bin_offset_);
+ a.ldp(TMP2, bin_offset, mem_bin_base);
+
+ if (size_reg.isValid()) {
+ a.add(TMP1, bin_offset, size_reg);
+ } else {
+ add(TMP1, bin_offset, size);
+ }
+ a.str(TMP1, mem_bin_offset);
+
+ a.add(TMP1, TMP2, bin_offset, arm::lsr(3));
+ } else {
+ comment("optimized updating of binary construction state");
+ ASSERT(size >= 0 || size_reg.isValid());
+ ASSERT(bit_offset % 8 == 0);
+ a.ldr(TMP1, mem_bin_base);
+ if (size_reg.isValid()) {
+ if (bit_offset == 0) {
+ a.str(size_reg, mem_bin_offset);
+ } else {
+ add(TMP2, size_reg, bit_offset);
+ a.str(TMP2, mem_bin_offset);
+ }
+ } else {
+ mov_imm(TMP2, bit_offset + size);
+ a.str(TMP2, mem_bin_offset);
+ }
+ if (bit_offset != 0) {
+ add(TMP1, TMP1, bit_offset >> 3);
+ }
+ }
+}
+
+/*
+ * The size of the segment is assumed to be in ARG3.
+ */
+void BeamModuleAssembler::set_zero(Sint effectiveSize) {
+ Label store_units = a.newLabel();
+ Label less_than_a_store_unit = a.newLabel();
+ Sint store_unit = 1;
+
+ update_bin_state(ARG2, -1, -1, ARG3);
+
+ if (effectiveSize >= 256) {
+ /* Store four 64-bit words machine words when the size is
+ * known and at least 256 bits. */
+ store_unit = 4;
+ a.movi(a64::d31, 0);
+ } else if (effectiveSize >= 128) {
+ /* Store two 64-bit words machine words when the size is
+ * known and at least 128 bits. */
+ store_unit = 2;
+ }
+
+ if (effectiveSize < Sint(store_unit * 8 * sizeof(Eterm))) {
+ /* The size is either not known or smaller than a word. */
+ a.cmp(ARG3, imm(store_unit * 8 * sizeof(Eterm)));
+ a.b_lt(less_than_a_store_unit);
+ }
+
+ a.bind(store_units);
+ if (store_unit == 4) {
+ a.stp(a64::q31, a64::q31, arm::Mem(TMP1).post(sizeof(Eterm[4])));
+ } else if (store_unit == 2) {
+ a.stp(ZERO, ZERO, arm::Mem(TMP1).post(sizeof(Eterm[2])));
+ } else {
+ a.str(ZERO, arm::Mem(TMP1).post(sizeof(Eterm)));
+ }
+ a.sub(ARG3, ARG3, imm(store_unit * 8 * sizeof(Eterm)));
+
+ a.cmp(ARG3, imm(store_unit * 8 * sizeof(Eterm)));
+ a.b_ge(store_units);
+
+ a.bind(less_than_a_store_unit);
+ if (effectiveSize < 0) {
+ /* Unknown size. */
+ Label byte_loop = a.newLabel();
+ Label done = a.newLabel();
+
+ ASSERT(store_unit = 1);
+
+ a.cbz(ARG3, done);
+
+ a.bind(byte_loop);
+ a.strb(ZERO.w(), arm::Mem(TMP1).post(1));
+ a.subs(ARG3, ARG3, imm(8));
+ a.b_gt(byte_loop);
+
+ a.bind(done);
+ } else if (effectiveSize % (store_unit * 8 * sizeof(Eterm)) != 0) {
+ /* The size is known, and we know that there are less than
+ * 256 bits to initialize. */
+ if (store_unit == 4 && (effectiveSize & 255) >= 128) {
+ a.stp(ZERO, ZERO, arm::Mem(TMP1).post(16));
+ }
+
+ if ((effectiveSize & 127) >= 64) {
+ a.str(ZERO, arm::Mem(TMP1).post(8));
+ }
+
+ if ((effectiveSize & 63) >= 32) {
+ a.str(ZERO.w(), arm::Mem(TMP1).post(4));
+ }
+
+ if ((effectiveSize & 31) >= 16) {
+ a.strh(ZERO.w(), arm::Mem(TMP1).post(2));
+ }
+
+ if ((effectiveSize & 15) >= 8) {
+ a.strb(ZERO.w(), arm::Mem(TMP1).post(1));
+ }
+
+ if ((effectiveSize & 7) > 0) {
+ a.strb(ZERO.w(), arm::Mem(TMP1));
+ }
+ }
+}
+
+/*
+ * In:
+ *
+ * ARG1 = valid unicode code point (=> 0x80) to encode
+ *
+ * Out:
+ *
+ * ARG1 = the code point encoded in UTF-8.
+ * ARG4 = number of bits of result (16, 24, or 32)
+ *
+ * Preserves other ARG* registers, clobbers TMP* registers
+ */
+void BeamGlobalAssembler::emit_construct_utf8_shared() {
+ Label more_than_two_bytes = a.newLabel();
+ Label four_bytes = a.newLabel();
+ const arm::Gp value = ARG1;
+ const arm::Gp num_bits = ARG4;
+
+ a.cmp(value, imm(0x800));
+ a.b_hs(more_than_two_bytes);
+
+ /* Encode Unicode code point in two bytes. */
+ a.ubfiz(TMP1, value, imm(8), imm(6));
+ mov_imm(TMP2, 0x80c0);
+ a.orr(TMP1, TMP1, value, arm::lsr(6));
+ mov_imm(num_bits, 16);
+ a.orr(value, TMP1, TMP2);
+ a.ret(a64::x30);
+
+ /* Test whether the value should be encoded in four bytes. */
+ a.bind(more_than_two_bytes);
+ a.lsr(TMP1, value, imm(16));
+ a.cbnz(TMP1, four_bytes);
+
+ /* Encode Unicode code point in three bytes. */
+ a.lsl(TMP1, value, imm(2));
+ a.ubfiz(TMP2, value, imm(16), imm(6));
+ a.and_(TMP1, TMP1, imm(0x3f00));
+ mov_imm(num_bits, 24);
+ a.orr(TMP1, TMP1, value, arm::lsr(12));
+ a.orr(TMP1, TMP1, TMP2);
+ mov_imm(TMP2, 0x8080e0);
+ a.orr(value, TMP1, TMP2);
+ a.ret(a64::x30);
+
+ /* Encode Unicode code point in four bytes. */
+ a.bind(four_bytes);
+ a.lsl(TMP1, value, imm(10));
+ a.lsr(TMP2, value, imm(4));
+ a.and_(TMP1, TMP1, imm(0x3f0000));
+ a.and_(TMP2, TMP2, imm(0x3f00));
+ a.bfxil(TMP1, value, imm(18), imm(14));
+ mov_imm(num_bits, 32);
+ a.bfi(TMP1, value, imm(24), imm(6));
+ a.orr(TMP1, TMP1, TMP2);
+ mov_imm(TMP2, 0x808080f0);
+ a.orr(value, TMP1, TMP2);
+ a.ret(a64::x30);
+}
+
+void BeamModuleAssembler::emit_construct_utf8(const ArgVal &Src,
+ Sint bit_offset,
+ bool is_byte_aligned) {
+ Label prepare_store = a.newLabel();
+ Label store = a.newLabel();
+ Label next = a.newLabel();
+
+ comment("construct utf8 segment");
+ auto src = load_source(Src, ARG1);
+
+ a.lsr(ARG1, src.reg, imm(_TAG_IMMED1_SIZE));
+ mov_imm(ARG4, 8);
+ a.cmp(ARG1, imm(0x80));
+ a.b_lo(prepare_store);
+
+ fragment_call(ga->get_construct_utf8_shared());
+
+ a.bind(prepare_store);
+ arm::Gp bin_offset = ARG3;
+ update_bin_state(bin_offset, bit_offset, -1, ARG4);
+
+ if (!is_byte_aligned) {
+ /* Not known to be byte-aligned. Must test alignment. */
+ a.ands(TMP2, bin_offset, imm(7));
+ a.b_eq(store);
+
+ /* We must combine the last partial byte with the UTF-8
+ * encoded code point. */
+ a.ldrb(TMP5.w(), arm::Mem(TMP1));
+
+ a.rev64(TMP4, ARG1);
+ a.lsr(TMP4, TMP4, TMP2);
+ a.rev64(TMP4, TMP4);
+
+ a.lsl(TMP5, TMP5, TMP2);
+ a.and_(TMP5, TMP5, imm(~0xff));
+ a.lsr(TMP5, TMP5, TMP2);
+
+ a.orr(ARG1, TMP4, TMP5);
+
+ a.add(ARG4, ARG4, imm(8));
+ }
+
+ a.bind(store);
+ if (bit_offset % (4 * 8) == 0) {
+ /* This segment is aligned on a 4-byte boundary. This implies
+ * that a 4-byte write will be inside the allocated binary. */
+ a.str(ARG1.w(), arm::Mem(TMP1));
+ } else {
+ Label do_store_1 = a.newLabel();
+ Label do_store_2 = a.newLabel();
+
+ /* Unsuitable or unknown alignment. We must be careful not
+ * to write beyound the allocated end of the binary. */
+ a.cmp(ARG4, imm(8));
+ a.b_ne(do_store_1);
+
+ a.strb(ARG1.w(), arm::Mem(TMP1));
+ a.b(next);
+
+ a.bind(do_store_1);
+ a.cmp(ARG4, imm(24));
+ a.b_hi(do_store_2);
+
+ a.strh(ARG1.w(), arm::Mem(TMP1));
+ a.cmp(ARG4, imm(16));
+ a.b_eq(next);
+
+ a.lsr(ARG1, ARG1, imm(16));
+ a.strb(ARG1.w(), arm::Mem(TMP1, 2));
+ a.b(next);
+
+ a.bind(do_store_2);
+ a.str(ARG1.w(), arm::Mem(TMP1));
+
+ if (!is_byte_aligned) {
+ a.cmp(ARG4, imm(32));
+ a.b_eq(next);
+
+ a.lsr(ARG1, ARG1, imm(32));
+ a.strb(ARG1.w(), arm::Mem(TMP1, 4));
+ }
+ }
+
+ a.bind(next);
+}
+
+/*
+ * In:
+ * TMP1 = pointer to current byte
+ * ARG3 = bit offset
+ * ARG4 = number of bits to write
+ * ARG8 = data to write
+ */
+void BeamGlobalAssembler::emit_store_unaligned() {
+ Label loop = a.newLabel();
+ Label done = a.newLabel();
+ const arm::Gp left_bit_offset = ARG3;
+ const arm::Gp right_bit_offset = TMP6;
+ const arm::Gp num_bits = ARG4;
+ const arm::Gp bitdata = ARG8;
+
+ a.ldrb(TMP5.w(), arm::Mem(TMP1));
+
+ a.and_(TMP4, bitdata, imm(0xff));
+ a.lsr(TMP4, TMP4, left_bit_offset);
+
+ a.lsl(TMP5, TMP5, left_bit_offset);
+ a.and_(TMP5, TMP5, imm(~0xff));
+ a.lsr(TMP5, TMP5, left_bit_offset);
+
+ a.orr(TMP5, TMP4, TMP5);
+
+ a.strb(TMP5.w(), arm::Mem(TMP1).post(1));
+
+ mov_imm(right_bit_offset, 8);
+ a.sub(right_bit_offset, right_bit_offset, left_bit_offset);
+
+ a.rev64(bitdata, bitdata);
+ a.lsl(bitdata, bitdata, right_bit_offset);
+
+ a.subs(num_bits, num_bits, right_bit_offset);
+ a.b_le(done);
+
+ a.bind(loop);
+ a.ror(bitdata, bitdata, imm(56));
+ a.strb(bitdata.w(), arm::Mem(TMP1).post(1));
+ a.subs(num_bits, num_bits, imm(8));
+ a.b_gt(loop);
+
+ a.bind(done);
+ a.ret(a64::x30);
+}
+
void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
const ArgWord &Alloc,
const ArgWord &Live0,
const ArgRegister &Dst,
const Span<ArgVal> &args) {
Uint num_bits = 0;
+ Uint estimated_num_bits = 0;
std::size_t n = args.size();
std::vector<BscSegment> segments;
- Label error;
+ Label error; /* Intentionally uninitialized */
ArgWord Live = Live0;
arm::Gp sizeReg;
+ Sint allocated_size = -1;
+ bool need_error_handler = false;
/*
* Collect information about each segment and calculate sizes of
@@ -1501,17 +2273,67 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
seg.error_info = beam_jit_set_bsc_segment_op(bsc_segment, bsc_op);
/*
+ * Test whether we can omit the code for the error handler.
+ */
+ switch (seg.type) {
+ case am_append:
+ if (!(exact_type<BeamTypeId::Bitstring>(seg.src) &&
+ std::gcd(seg.unit, getSizeUnit(seg.src)) == seg.unit)) {
+ need_error_handler = true;
+ }
+ break;
+ case am_binary:
+ if (!(seg.size.isAtom() && seg.size.as<ArgAtom>().get() == am_all &&
+ exact_type<BeamTypeId::Bitstring>(seg.src) &&
+ std::gcd(seg.unit, getSizeUnit(seg.src)) == seg.unit)) {
+ need_error_handler = true;
+ }
+ break;
+ case am_integer:
+ if (!exact_type<BeamTypeId::Integer>(seg.src)) {
+ need_error_handler = true;
+ }
+ break;
+ case am_private_append:
+ case am_string:
+ break;
+ default:
+ need_error_handler = true;
+ break;
+ }
+
+ /*
* Attempt to calculate the effective size of this segment.
- * Give up is variable or invalid.
+ * Give up if variable or invalid.
*/
if (seg.size.isSmall() && seg.unit != 0) {
Uint unsigned_size = seg.size.as<ArgSmall>().getUnsigned();
- if ((unsigned_size >> (sizeof(Eterm) - 1) * 8) == 0) {
+ if ((unsigned_size >> (sizeof(Eterm) - 1) * 8) != 0) {
+ /* Suppress creation of heap binary. */
+ estimated_num_bits += (ERL_ONHEAP_BIN_LIMIT + 1) * 8;
+ } else {
/* This multiplication cannot overflow. */
Uint seg_size = seg.unit * unsigned_size;
seg.effectiveSize = seg_size;
num_bits += seg_size;
+ estimated_num_bits += seg_size;
+ }
+ } else if (seg.unit > 0) {
+ auto max = std::min(std::get<1>(getClampedRange(seg.size)),
+ Sint((ERL_ONHEAP_BIN_LIMIT + 1) * 8));
+ estimated_num_bits += max * seg.unit;
+ } else {
+ switch (seg.type) {
+ case am_utf8:
+ case am_utf16:
+ case am_utf32:
+ estimated_num_bits += 32;
+ break;
+ default:
+ /* Suppress creation of heap binary. */
+ estimated_num_bits += (ERL_ONHEAP_BIN_LIMIT + 1) * 8;
+ break;
}
}
@@ -1520,14 +2342,15 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
/* At least one segment will need a dynamic size
* calculation. */
sizeReg = ARG8;
+ need_error_handler = true;
}
segments.insert(segments.end(), seg);
}
- if (Fail.get() != 0) {
+ if (need_error_handler && Fail.get() != 0) {
error = resolve_beam_label(Fail, dispUnknown);
- } else {
+ } else if (need_error_handler) {
Label past_error = a.newLabel();
a.b(past_error);
@@ -1550,6 +2373,8 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
}
a.bind(past_error);
+ } else {
+ comment("(cannot fail)");
}
/* We count the total number of bits in an unsigned integer. To
@@ -1575,13 +2400,49 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
if (seg.size.isAtom() && seg.size.as<ArgAtom>().get() == am_all &&
seg.type == am_binary) {
comment("size of an entire binary");
- mov_arg(ARG1, seg.src);
- a.mov(ARG3, ARG1);
- fragment_call(ga->get_bs_bit_size_shared());
- if (exact_type(seg.src, BEAM_TYPE_BITSTRING)) {
- comment("skipped check for success since the source "
- "is always a bit string");
+ if (exact_type<BeamTypeId::Bitstring>(seg.src)) {
+ auto src = load_source(seg.src, ARG1);
+ arm::Gp boxed_ptr = emit_ptr_val(ARG1, src.reg);
+ auto unit = getSizeUnit(seg.src);
+ bool is_bitstring = unit == 0 || std::gcd(unit, 8) != 8;
+
+ if (is_bitstring) {
+ comment("inlined size code because the value is always "
+ "a bitstring");
+ } else {
+ comment("inlined size code because the value is always "
+ "a binary");
+ }
+
+ a.ldur(TMP2, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+
+ if (is_bitstring) {
+ a.ldur(TMP1, emit_boxed_val(boxed_ptr));
+ }
+
+ a.add(sizeReg, sizeReg, TMP2, arm::lsl(3));
+
+ if (is_bitstring) {
+ Label not_sub_bin = a.newLabel();
+ const int bit_number = 3;
+ ERTS_CT_ASSERT(
+ (_TAG_HEADER_SUB_BIN & (1 << bit_number)) != 0 &&
+ (_TAG_HEADER_REFC_BIN & (1 << bit_number)) == 0 &&
+ (_TAG_HEADER_HEAP_BIN & (1 << bit_number)) == 0);
+
+ a.tbz(TMP1, imm(bit_number), not_sub_bin);
+
+ a.ldurb(TMP2.w(),
+ emit_boxed_val(boxed_ptr,
+ offsetof(ErlSubBin, bitsize)));
+ a.add(sizeReg, sizeReg, TMP2);
+
+ a.bind(not_sub_bin);
+ }
} else {
+ mov_arg(ARG1, seg.src);
+ a.mov(ARG3, ARG1);
+ fragment_call(ga->get_bs_bit_size_shared());
if (Fail.get() == 0) {
mov_imm(ARG4,
beam_jit_update_bsc_reason_info(seg.error_info,
@@ -1590,14 +2451,14 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
BSC_VALUE_ARG3));
}
a.b_mi(resolve_label(error, disp1MB));
+ a.add(sizeReg, sizeReg, ARG1);
}
- a.add(sizeReg, sizeReg, ARG1);
} else if (seg.unit != 0) {
bool can_fail = true;
comment("size binary/integer/float/string");
if (always_small(seg.size)) {
- auto [min, _] = getIntRange(seg.size);
+ auto min = std::get<0>(getClampedRange(seg.size));
if (min >= 0) {
can_fail = false;
}
@@ -1615,8 +2476,7 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
if (always_small(seg.size)) {
comment("skipped test for small size since it is always small");
- } else if (always_one_of(seg.size,
- BEAM_TYPE_FLOAT | BEAM_TYPE_INTEGER)) {
+ } else if (always_one_of<BeamTypeId::Number>(seg.size)) {
comment("simplified test for small size since it is a number");
emit_is_not_boxed(error, ARG3);
} else {
@@ -1627,10 +2487,10 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
if (can_fail) {
a.tbnz(ARG3, 63, resolve_label(error, disp32K));
}
- a.asr(TMP1, ARG3, imm(_TAG_IMMED1_SIZE));
if (seg.unit == 1) {
- a.add(sizeReg, sizeReg, TMP1);
+ a.add(sizeReg, sizeReg, ARG3, arm::asr(_TAG_IMMED1_SIZE));
} else {
+ a.asr(TMP1, ARG3, imm(_TAG_IMMED1_SIZE));
if (Fail.get() == 0) {
mov_imm(ARG4,
beam_jit_update_bsc_reason_info(
@@ -1639,7 +2499,7 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
BSC_INFO_SIZE,
BSC_VALUE_ARG3));
}
- a.tst(TMP1, imm(0xffful << 52));
+ a.tst(TMP1, imm(0xffful << (SMALL_BITS - ERL_UNIT_BITS)));
a.b_ne(resolve_label(error, disp1MB));
mov_imm(TMP2, seg.unit);
a.madd(sizeReg, TMP1, TMP2, sizeReg);
@@ -1649,24 +2509,60 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
case am_utf8: {
comment("size utf8");
Label next = a.newLabel();
- auto src_reg = load_source(seg.src, TMP1);
- a.lsr(TMP1, src_reg.reg, imm(_TAG_IMMED1_SIZE));
- mov_imm(TMP2, 1 * 8);
+ mov_arg(ARG3, seg.src);
+
+ if (Fail.get() == 0) {
+ mov_imm(ARG4,
+ beam_jit_update_bsc_reason_info(seg.error_info,
+ BSC_REASON_BADARG,
+ BSC_INFO_TYPE,
+ BSC_VALUE_ARG3));
+ }
+
+ if (always_small(seg.src)) {
+ comment("skipped test for small value since it is always "
+ "small");
+ } else if (always_one_of<BeamTypeId::Integer,
+ BeamTypeId::AlwaysBoxed>(seg.src)) {
+ comment("simplified test for small operand since other "
+ "types are boxed");
+ emit_is_not_boxed(resolve_label(error, dispUnknown), ARG3);
+ } else {
+ a.and_(TMP1, ARG3, imm(_TAG_IMMED1_MASK));
+ a.cmp(TMP1, imm(_TAG_IMMED1_SMALL));
+ a.b_ne(resolve_label(error, disp1MB));
+ }
+
+ a.asr(TMP1, ARG3, imm(_TAG_IMMED1_SIZE));
+ mov_imm(TMP2, 1);
a.cmp(TMP1, imm(0x7F));
a.b_ls(next);
- mov_imm(TMP2, 2 * 8);
+ mov_imm(TMP2, 2);
a.cmp(TMP1, imm(0x7FFUL));
a.b_ls(next);
+ /* Ensure that the value is not in the invalid range
+ * 0xD800 through 0xDFFF. */
+ a.lsr(TMP3, TMP1, imm(11));
+ a.cmp(TMP3, 0x1b);
+ a.b_eq(resolve_label(error, disp1MB));
+
a.cmp(TMP1, imm(0x10000UL));
- mov_imm(TMP2, 3 * 8);
- mov_imm(TMP3, 4 * 8);
- a.csel(TMP2, TMP2, TMP3, arm::CondCode::kLO);
+ a.cset(TMP2, arm::CondCode::kHS);
+ a.add(TMP2, TMP2, imm(3));
+
+ auto [min, max] = getClampedRange(seg.src);
+ if (0 <= min && max < 0x110000) {
+ comment("skipped range check for unicode code point");
+ } else {
+ a.cmp(TMP1, 0x110000);
+ a.b_hs(resolve_label(error, disp1MB));
+ }
a.bind(next);
- a.add(sizeReg, sizeReg, TMP2);
+ a.add(sizeReg, sizeReg, TMP2, arm::lsl(3));
break;
}
case am_utf16: {
@@ -1742,21 +2638,28 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
a.mov(ARG1, c_p);
load_x_reg_array(ARG2);
- emit_enter_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_enter_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get() + 1);
runtime_call<6>(erts_bs_append_checked);
- emit_leave_runtime<Update::eStack | Update::eHeap | Update::eXRegs |
+ emit_leave_runtime<Update::eHeapAlloc | Update::eXRegs |
Update::eReductions>(Live.get() + 1);
- if (Fail.get() == 0) {
- mov_arg(ARG3, ArgXRegister(Live.get()));
- mov_imm(ARG4,
- beam_jit_update_bsc_reason_info(seg.error_info,
- BSC_REASON_BADARG,
- BSC_INFO_FVALUE,
- BSC_VALUE_ARG3));
+ if (exact_type<BeamTypeId::Bitstring>(seg.src) &&
+ std::gcd(seg.unit, getSizeUnit(seg.src)) == seg.unit) {
+ /* There is no way the call can fail with a system_limit
+ * exception on a 64-bit architecture. */
+ comment("skipped test for success because units are compatible");
+ } else {
+ if (Fail.get() == 0) {
+ mov_arg(ARG3, ArgXRegister(Live.get()));
+ mov_imm(ARG4,
+ beam_jit_update_bsc_reason_info(seg.error_info,
+ BSC_REASON_BADARG,
+ BSC_INFO_FVALUE,
+ BSC_VALUE_ARG3));
+ }
+ emit_branch_if_not_value(ARG1, resolve_label(error, dispUnknown));
}
- emit_branch_if_not_value(ARG1, resolve_label(error, dispUnknown));
} else if (segments[0].type == am_private_append) {
BscSegment seg = segments[0];
comment("private append to binary");
@@ -1773,6 +2676,82 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
runtime_call<4>(erts_bs_private_append_checked);
emit_leave_runtime(Live.get());
/* There is no way the call can fail on a 64-bit architecture. */
+ } else if (estimated_num_bits % 8 == 0 &&
+ estimated_num_bits / 8 <= ERL_ONHEAP_BIN_LIMIT) {
+ static constexpr auto cur_bin_offset =
+ offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state) +
+ offsetof(struct erl_bits_state, erts_current_bin_);
+ Uint need;
+
+ arm::Mem mem_bin_base = arm::Mem(scheduler_registers, cur_bin_offset);
+
+ if (sizeReg.isValid()) {
+ Label after_gc_check = a.newLabel();
+
+ comment("allocate heap binary of dynamic size (=< %ld bits)",
+ estimated_num_bits);
+
+ /* Calculate number of bytes to allocate. */
+ need = (heap_bin_size(0) + Alloc.get() + S_RESERVED);
+ a.lsr(sizeReg, sizeReg, imm(3));
+ a.add(TMP3, sizeReg, imm(7));
+ a.and_(TMP3, TMP3, imm(-8));
+ a.add(TMP1, TMP3, imm(need * sizeof(Eterm)));
+
+ /* Do a GC test. */
+ a.add(ARG3, HTOP, TMP1);
+ a.cmp(ARG3, E);
+ a.b_ls(after_gc_check);
+
+ a.stp(sizeReg, TMP3, TMP_MEM1q);
+
+ mov_imm(ARG4, Live.get());
+ fragment_call(ga->get_garbage_collect());
+
+ a.ldp(sizeReg, TMP3, TMP_MEM1q);
+
+ a.bind(after_gc_check);
+
+ mov_imm(TMP1, header_heap_bin(0));
+ a.lsr(TMP4, TMP3, imm(3));
+ a.add(TMP1, TMP1, TMP4, arm::lsl(_HEADER_ARITY_OFFS));
+
+ /* Create the heap binary. */
+ a.add(ARG1, HTOP, imm(TAG_PRIMARY_BOXED));
+ a.stp(TMP1, sizeReg, arm::Mem(HTOP).post(sizeof(Eterm[2])));
+
+ /* Initialize the erl_bin_state struct. */
+ a.stp(HTOP, ZERO, mem_bin_base);
+
+ /* Update HTOP. */
+ a.add(HTOP, HTOP, TMP3);
+ } else {
+ Uint num_bytes = num_bits / 8;
+
+ comment("allocate heap binary of static size");
+
+ allocated_size = (num_bytes + 7) & (-8);
+
+ /* Ensure that there is sufficient room on the heap. */
+ need = heap_bin_size(num_bytes) + Alloc.get();
+ emit_gc_test(ArgWord(0), ArgWord(need), Live);
+
+ mov_imm(TMP1, header_heap_bin(num_bytes));
+ mov_imm(TMP2, num_bytes);
+
+ /* Create the heap binary. */
+ a.add(ARG1, HTOP, imm(TAG_PRIMARY_BOXED));
+ a.stp(TMP1, TMP2, arm::Mem(HTOP).post(sizeof(Eterm[2])));
+
+ /* Initialize the erl_bin_state struct. */
+ ERTS_CT_ASSERT_FIELD_PAIR(struct erl_bits_state,
+ erts_current_bin_,
+ erts_bin_offset_);
+ a.stp(HTOP, ZERO, mem_bin_base);
+
+ /* Update HTOP. */
+ a.add(HTOP, HTOP, imm(allocated_size));
+ }
} else {
comment("allocate binary");
mov_arg(ARG5, Alloc);
@@ -1780,30 +2759,43 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
load_erl_bits_state(ARG3);
load_x_reg_array(ARG2);
a.mov(ARG1, c_p);
- emit_enter_runtime<Update::eReductions | Update::eStack |
- Update::eHeap | Update::eXRegs>(Live.get());
+ emit_enter_runtime<Update::eReductions | Update::eHeapAlloc |
+ Update::eXRegs>(Live.get());
if (sizeReg.isValid()) {
comment("(size in bits)");
a.mov(ARG4, sizeReg);
runtime_call<6>(beam_jit_bs_init_bits);
- } else if (num_bits % 8 == 0) {
- comment("(size in bytes)");
- mov_imm(ARG4, num_bits / 8);
- runtime_call<6>(beam_jit_bs_init);
} else {
+ allocated_size = (num_bits + 7) / 8;
+ if (allocated_size <= ERL_ONHEAP_BIN_LIMIT) {
+ allocated_size = (allocated_size + 7) & (-8);
+ }
mov_imm(ARG4, num_bits);
runtime_call<6>(beam_jit_bs_init_bits);
}
- emit_leave_runtime<Update::eReductions | Update::eStack |
- Update::eHeap | Update::eXRegs>(Live.get());
+ emit_leave_runtime<Update::eReductions | Update::eHeapAlloc |
+ Update::eXRegs>(Live.get());
}
a.str(ARG1, TMP_MEM1q);
+ segments = bs_combine_segments(segments);
+
+ /* Keep track of the bit offset from the being of the binary.
+ * Set to -1 if offset is not known (when a segment of unknown
+ * size has been seen). */
+ Sint bit_offset = 0;
+
+ /* Keep track of whether the current segment is byte-aligned. (A
+ * segment can be known to be byte-aligned even if the bit offset
+ * is unknown.) */
+ bool is_byte_aligned = true;
+
/* Build each segment of the binary. */
for (auto seg : segments) {
switch (seg.type) {
case am_append:
case am_private_append:
+ bit_offset = -1;
break;
case am_binary: {
Uint error_info;
@@ -1838,8 +2830,10 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
BSC_REASON_BADARG,
BSC_INFO_UNIT,
BSC_VALUE_FVALUE);
- if (seg.unit == 1) {
- comment("skipped test for success because unit =:= 1");
+ if (exact_type<BeamTypeId::Bitstring>(seg.src) &&
+ std::gcd(seg.unit, getSizeUnit(seg.src)) == seg.unit) {
+ comment("skipped test for success because units are "
+ "compatible");
can_fail = false;
}
} else {
@@ -1847,8 +2841,8 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
* the value is a non-negative small in the
* appropriate range. Multiply the size with the
* unit. */
- mov_arg(ARG3, seg.size);
- a.asr(ARG3, ARG3, imm(_TAG_IMMED1_SIZE));
+ auto r = load_source(seg.size, ARG3);
+ a.asr(ARG3, r.reg, imm(_TAG_IMMED1_SIZE));
if (seg.unit != 1) {
mov_imm(TMP1, seg.unit);
a.mul(ARG3, ARG3, TMP1);
@@ -1879,8 +2873,8 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
if (seg.effectiveSize >= 0) {
mov_imm(ARG3, seg.effectiveSize);
} else {
- mov_arg(ARG3, seg.size);
- a.asr(ARG3, ARG3, imm(_TAG_IMMED1_SIZE));
+ auto r = load_source(seg.size, ARG3);
+ a.asr(ARG3, r.reg, imm(_TAG_IMMED1_SIZE));
if (seg.unit != 1) {
mov_imm(TMP1, seg.unit);
a.mul(ARG3, ARG3, TMP1);
@@ -1904,38 +2898,281 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
emit_branch_if_value(ARG1, resolve_label(error, dispUnknown));
break;
case am_integer:
- comment("construct integer segment");
- if (seg.effectiveSize >= 0) {
- mov_imm(ARG3, seg.effectiveSize);
- } else {
- mov_arg(ARG3, seg.size);
- a.asr(ARG3, ARG3, imm(_TAG_IMMED1_SIZE));
- if (seg.unit != 1) {
- mov_imm(TMP1, seg.unit);
- a.mul(ARG3, ARG3, TMP1);
+ switch (seg.action) {
+ case BscSegment::action::ACCUMULATE_FIRST:
+ case BscSegment::action::ACCUMULATE: {
+ /* Shift an integer of known size (no more than 64 bits)
+ * into a word-size accumulator. */
+ Label value_is_small = a.newLabel();
+ Label done = a.newLabel();
+
+ comment("accumulate value for integer segment");
+ auto src = load_source(seg.src, ARG1);
+ if (seg.effectiveSize < 64 &&
+ seg.action == BscSegment::action::ACCUMULATE) {
+ a.lsl(ARG8, ARG8, imm(seg.effectiveSize));
+ }
+
+ if (!always_small(seg.src)) {
+ if (always_one_of<BeamTypeId::Integer,
+ BeamTypeId::AlwaysBoxed>(seg.src)) {
+ comment("simplified small test since all other types "
+ "are boxed");
+ emit_is_boxed(value_is_small, seg.src, src.reg);
+ } else {
+ a.and_(TMP1, src.reg, imm(_TAG_IMMED1_MASK));
+ a.cmp(TMP1, imm(_TAG_IMMED1_SMALL));
+ a.b_eq(value_is_small);
+ }
+
+ /* The value is boxed. If it is a bignum, extract the
+ * least significant 64 bits. */
+ mov_var(ARG1, src);
+ fragment_call(ga->get_get_sint64_shared());
+ if (seg.effectiveSize == 64) {
+ a.mov(ARG8, ARG1);
+ } else {
+ a.bfxil(ARG8,
+ ARG1,
+ arm::lsr(0),
+ imm(seg.effectiveSize));
+ }
+
+ if (exact_type<BeamTypeId::Integer>(seg.src)) {
+ a.b(done);
+ } else {
+ a.b_ne(done);
+
+ /* Not a bignum. Signal error. */
+ if (Fail.get() == 0) {
+ mov_imm(ARG4,
+ beam_jit_update_bsc_reason_info(
+ seg.error_info,
+ BSC_REASON_BADARG,
+ BSC_INFO_TYPE,
+ BSC_VALUE_ARG1));
+ }
+ a.b(resolve_label(error, disp128MB));
+ }
+ }
+
+ a.bind(value_is_small);
+ if (seg.effectiveSize == 64) {
+ a.asr(ARG8, src.reg, imm(_TAG_IMMED1_SIZE));
+ } else if (seg.effectiveSize + _TAG_IMMED1_SIZE > 64) {
+ a.asr(TMP1, src.reg, imm(_TAG_IMMED1_SIZE));
+ a.bfxil(ARG8, TMP1, arm::lsr(0), imm(seg.effectiveSize));
+ } else {
+ a.bfxil(ARG8,
+ src.reg,
+ arm::lsr(_TAG_IMMED1_SIZE),
+ imm(seg.effectiveSize));
}
+
+ a.bind(done);
+ break;
}
- mov_arg(ARG2, seg.src);
- mov_imm(ARG4, seg.flags);
- load_erl_bits_state(ARG1);
+ case BscSegment::action::STORE: {
+ /* The accumulator is now full or the next segment is
+ * not possible to accumulate, so it's time to store
+ * the accumulator to the current position in the
+ * binary. */
+ Label store = a.newLabel();
+ Label done = a.newLabel();
+
+ comment("construct integer segment from accumulator");
+
+ /* First we'll need to ensure that the value in the
+ * accumulator is in little endian format. */
+ ASSERT(seg.effectiveSize >= 0);
+ if (seg.effectiveSize % 8) {
+ Uint complete_bytes = 8 * (seg.effectiveSize / 8);
+ Uint num_partial = seg.effectiveSize % 8;
+ if (seg.flags & BSF_LITTLE) {
+ a.ubfx(TMP1,
+ ARG8,
+ imm(complete_bytes),
+ imm(num_partial));
+ a.bfc(ARG8,
+ arm::lsr(complete_bytes),
+ imm(64 - complete_bytes));
+ a.bfi(ARG8,
+ TMP1,
+ imm(complete_bytes + 8 - num_partial),
+ imm(num_partial));
+ } else {
+ a.lsl(ARG8, ARG8, imm(64 - seg.effectiveSize));
+ a.rev64(ARG8, ARG8);
+ }
+ } else if ((seg.flags & BSF_LITTLE) == 0) {
+ switch (seg.effectiveSize) {
+ case 8:
+ break;
+ case 16:
+ a.rev16(ARG8, ARG8);
+ break;
+ case 32:
+ a.rev32(ARG8, ARG8);
+ break;
+ case 64:
+ a.rev64(ARG8, ARG8);
+ break;
+ default:
+ a.rev64(ARG8, ARG8);
+ a.lsr(ARG8, ARG8, imm(64 - seg.effectiveSize));
+ }
+ }
- emit_enter_runtime(Live.get());
- runtime_call<4>(erts_new_bs_put_integer);
- emit_leave_runtime(Live.get());
+ arm::Gp bin_offset = ARG3;
+ arm::Gp bin_data = ARG8;
+
+ update_bin_state(bin_offset,
+ bit_offset,
+ seg.effectiveSize,
+ arm::Gp());
+
+ if (!is_byte_aligned) {
+ if (bit_offset < 0) {
+ /* Bit offset is unknown. Must test alignment. */
+ a.ands(bin_offset, bin_offset, imm(7));
+ a.b_eq(store);
+ } else if (bit_offset >= 0) {
+ /* Alignment is known to be unaligned. */
+ mov_imm(bin_offset, bit_offset & 7);
+ }
+
+ /* Bit offset is tested or known to be unaligned. */
+ mov_imm(ARG4, seg.effectiveSize);
+ fragment_call(ga->get_store_unaligned());
+
+ if (bit_offset < 0) {
+ /* The bit offset is unknown, which implies that
+ * there exists store code that we will need to
+ * branch past. */
+ a.b(done);
+ }
+ }
- if (exact_type(seg.src, BEAM_TYPE_INTEGER)) {
- comment("skipped test for success because construction can't "
- "fail");
- } else {
- if (Fail.get() == 0) {
- mov_arg(ARG3, seg.src);
- mov_imm(ARG4,
- beam_jit_update_bsc_reason_info(seg.error_info,
- BSC_REASON_BADARG,
- BSC_INFO_TYPE,
- BSC_VALUE_ARG3));
+ a.bind(store);
+
+ if (bit_offset < 0 || is_byte_aligned) {
+ /* Bit offset is tested or known to be
+ * byte-aligned. Emit inline code to store the
+ * value of the accumulator into the binary. */
+ int num_bytes = (seg.effectiveSize + 7) / 8;
+
+ /* If more than one instruction is required for
+ * doing the store, test whether it would be safe
+ * to do a single 32 or 64 bit store. */
+ switch (num_bytes) {
+ case 3:
+ if (bit_offset >= 0 &&
+ allocated_size * 8 - bit_offset >= 32) {
+ comment("simplified complicated store");
+ num_bytes = 4;
+ }
+ break;
+ case 5:
+ case 6:
+ case 7:
+ if (bit_offset >= 0 &&
+ allocated_size * 8 - bit_offset >= 64) {
+ comment("simplified complicated store");
+ num_bytes = 8;
+ }
+ break;
+ }
+
+ do {
+ switch (num_bytes) {
+ case 1:
+ a.strb(bin_data.w(), arm::Mem(TMP1));
+ break;
+ case 2:
+ a.strh(bin_data.w(), arm::Mem(TMP1));
+ break;
+ case 3:
+ a.strh(bin_data.w(), arm::Mem(TMP1));
+ a.lsr(bin_data, bin_data, imm(16));
+ a.strb(bin_data.w(), arm::Mem(TMP1, 2));
+ break;
+ case 4:
+ a.str(bin_data.w(), arm::Mem(TMP1));
+ break;
+ case 5:
+ case 6:
+ case 7:
+ a.str(bin_data.w(), arm::Mem(TMP1).post(4));
+ a.lsr(bin_data, bin_data, imm(32));
+ break;
+ case 8:
+ a.str(bin_data, arm::Mem(TMP1));
+ num_bytes = 0;
+ break;
+ }
+ num_bytes -= 4;
+ } while (num_bytes > 0);
+ }
+
+ a.bind(done);
+ break;
+ }
+ case BscSegment::action::DIRECT:
+ /* This segment either has a size exceeding the maximum
+ * accumulator size of 64 bits or has a variable size.
+ *
+ * First load the effective size (size * unit) into ARG3.
+ */
+ comment("construct integer segment");
+ if (seg.effectiveSize >= 0) {
+ mov_imm(ARG3, seg.effectiveSize);
+ } else {
+ auto size = load_source(seg.size, TMP1);
+ a.lsr(ARG3, size.reg, imm(_TAG_IMMED1_SIZE));
+ if (Support::isPowerOf2(seg.unit)) {
+ Uint trailing_bits = Support::ctz<Eterm>(seg.unit);
+ if (trailing_bits) {
+ a.lsl(ARG3, ARG3, imm(trailing_bits));
+ }
+ } else {
+ mov_imm(TMP1, seg.unit);
+ a.mul(ARG3, ARG3, TMP1);
+ }
+ }
+
+ if (is_byte_aligned && seg.src.isSmall() &&
+ seg.src.as<ArgSmall>().getSigned() == 0) {
+ /* Optimize the special case of setting a known
+ * byte-aligned segment to zero. */
+ comment("optimized setting segment to 0");
+ set_zero(seg.effectiveSize);
+ } else {
+ /* Call the helper function to fetch and store the
+ * integer into the binary. */
+ mov_arg(ARG2, seg.src);
+ mov_imm(ARG4, seg.flags);
+ load_erl_bits_state(ARG1);
+
+ emit_enter_runtime(Live.get());
+ runtime_call<4>(erts_new_bs_put_integer);
+ emit_leave_runtime(Live.get());
+
+ if (exact_type<BeamTypeId::Integer>(seg.src)) {
+ comment("skipped test for success because construction "
+ "can't fail");
+ } else {
+ if (Fail.get() == 0) {
+ mov_arg(ARG3, seg.src);
+ mov_imm(ARG4,
+ beam_jit_update_bsc_reason_info(
+ seg.error_info,
+ BSC_REASON_BADARG,
+ BSC_INFO_TYPE,
+ BSC_VALUE_ARG3));
+ }
+ a.cbz(ARG1, resolve_label(error, disp1MB));
+ }
}
- a.cbz(ARG1, resolve_label(error, disp1MB));
}
break;
case am_string: {
@@ -1953,27 +3190,12 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
emit_leave_runtime(Live.get());
break;
}
- case am_utf8:
- comment("construct utf8 segment");
- mov_arg(ARG2, seg.src);
- load_erl_bits_state(ARG1);
-
- emit_enter_runtime(Live.get());
- runtime_call<2>(erts_bs_put_utf8);
-
- emit_leave_runtime(Live.get());
- if (Fail.get() == 0) {
- mov_arg(ARG3, seg.src);
- mov_imm(ARG4,
- beam_jit_update_bsc_reason_info(seg.error_info,
- BSC_REASON_BADARG,
- BSC_INFO_TYPE,
- BSC_VALUE_ARG3));
- }
- a.cbz(ARG1, resolve_label(error, disp1MB));
+ case am_utf8: {
+ emit_construct_utf8(seg.src, bit_offset, is_byte_aligned);
break;
+ }
case am_utf16:
- comment("construct utf8 segment");
+ comment("construct utf16 segment");
mov_arg(ARG2, seg.src);
a.mov(ARG3, seg.flags);
load_erl_bits_state(ARG1);
@@ -2016,8 +3238,925 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail,
ASSERT(0);
break;
}
+
+ /* Try to keep track of the bit offset. */
+ if (bit_offset >= 0 && (seg.action == BscSegment::action::DIRECT ||
+ seg.action == BscSegment::action::STORE)) {
+ if (seg.effectiveSize >= 0) {
+ bit_offset += seg.effectiveSize;
+ } else {
+ bit_offset = -1;
+ }
+ }
+
+ /* Try to keep track whether the next segment is byte
+ * aligned. */
+ if (seg.type == am_append || seg.type == am_private_append) {
+ if (!exact_type<BeamTypeId::Bitstring>(seg.src) ||
+ std::gcd(getSizeUnit(seg.src), 8) != 8) {
+ is_byte_aligned = false;
+ }
+ } else if (bit_offset % 8 == 0) {
+ is_byte_aligned = true;
+ } else if (seg.effectiveSize >= 0) {
+ if (seg.effectiveSize % 8 != 0) {
+ is_byte_aligned = false;
+ }
+ } else if (std::gcd(seg.unit, 8) != 8) {
+ is_byte_aligned = false;
+ }
}
comment("done");
mov_arg(Dst, TMP_MEM1q);
}
+
+/*
+ * Here follows the bs_match instruction and friends.
+ */
+
+struct BsmSegment {
+ BsmSegment()
+ : action(action::TEST_HEAP), live(ArgNil()), size(0), unit(1),
+ flags(0), dst(ArgXRegister(0)){};
+
+ enum class action {
+ TEST_HEAP,
+ ENSURE_AT_LEAST,
+ ENSURE_EXACTLY,
+ READ,
+ EXTRACT_BINARY,
+ EXTRACT_INTEGER,
+ GET_INTEGER,
+ GET_BINARY,
+ SKIP,
+ DROP,
+ GET_TAIL,
+ EQ
+ } action;
+ ArgVal live;
+ Uint size;
+ Uint unit;
+ Uint flags;
+ ArgRegister dst;
+};
+
+void BeamModuleAssembler::emit_read_bits(Uint bits,
+ const arm::Gp bin_base,
+ const arm::Gp bin_offset,
+ const arm::Gp bitdata) {
+ Label handle_partial = a.newLabel();
+ Label rev64 = a.newLabel();
+ Label shift = a.newLabel();
+ Label read_done = a.newLabel();
+
+ bool need_rev64 = false;
+
+ const arm::Gp bin_byte_ptr = TMP2;
+ const arm::Gp bit_offset = TMP4;
+ const arm::Gp tmp = TMP5;
+
+ auto num_partial = bits % 8;
+
+ ASSERT(1 <= bits && bits <= 64);
+
+ a.add(bin_byte_ptr, bin_base, bin_offset, arm::lsr(3));
+
+ if (bits <= 8) {
+ a.ands(bit_offset, bin_offset, imm(7));
+
+ if (num_partial == 0) {
+ /* Byte-sized segment. If bit_offset is not byte-aligned,
+ * this segment always spans two bytes. */
+ a.b_ne(handle_partial);
+ } else if (num_partial > 1) {
+ /* The segment is smaller than one byte but more than one
+ * bit. Test whether it fits within the current byte. */
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_gt(handle_partial);
+ }
+
+ /* The segment fits in the current byte. */
+ a.ldrb(bitdata.w(), arm::Mem(bin_byte_ptr));
+ if (num_partial == 0) {
+ a.rev64(bitdata, bitdata);
+ a.b(read_done);
+ } else if (num_partial > 1) {
+ a.b(rev64);
+ }
+
+ /* The segment is unaligned and spans two bytes. */
+ a.bind(handle_partial);
+ if (num_partial != 1) {
+ a.ldrh(bitdata.w(), arm::Mem(bin_byte_ptr));
+ }
+ need_rev64 = true;
+ } else if (bits <= 16) {
+ a.ands(bit_offset, bin_offset, imm(7));
+
+ /* We always need to read at least two bytes. */
+ a.ldrh(bitdata.w(), arm::Mem(bin_byte_ptr));
+ a.rev64(bitdata, bitdata);
+ a.b_eq(read_done); /* Done if segment is byte-aligned. */
+
+ /* The segment is unaligned. If its size is 9, it always fits
+ * in two bytes and we fall through to the shift instruction. */
+ a.bind(handle_partial);
+ if (num_partial > 1) {
+ /* If segment size is less than 15 bits or less, it is
+ * possible that it fits into two bytes. */
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_le(shift);
+ }
+
+ if (num_partial != 1) {
+ /* The segment spans three bytes. Read an additional byte and
+ * shift into place (right below the already read two bytes a
+ * the top of the word). */
+ a.ldrb(tmp.w(), arm::Mem(bin_byte_ptr, 2));
+ a.orr(bitdata, bitdata, tmp, arm::lsl(40));
+ }
+ } else if (bits <= 24) {
+ a.ands(bit_offset, bin_offset, imm(7));
+
+ if (num_partial == 0) {
+ /* Byte-sized segment. If bit_offset is not byte-aligned,
+ * this segment always spans four bytes. */
+ a.b_ne(handle_partial);
+ } else if (num_partial > 1) {
+ /* The segment is smaller than three bytes. Test whether
+ * it spans three or four bytes. */
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_gt(handle_partial);
+ }
+
+ /* This segment spans three bytes. */
+ a.ldrh(bitdata.w(), arm::Mem(bin_byte_ptr));
+ a.ldrb(tmp.w(), arm::Mem(bin_byte_ptr, 2));
+ a.orr(bitdata, bitdata, tmp, arm::lsl(16));
+ if (num_partial == 0) {
+ a.rev64(bitdata, bitdata);
+ a.b(read_done);
+ } else if (num_partial > 1) {
+ a.b(rev64);
+ }
+
+ /* This segment spans four bytes. */
+ a.bind(handle_partial);
+ if (num_partial != 1) {
+ a.ldr(bitdata.w(), arm::Mem(bin_byte_ptr));
+ }
+ need_rev64 = true;
+ } else if (bits <= 32) {
+ a.ands(bit_offset, bin_offset, imm(7));
+
+ /* We always need to read at least four bytes. */
+ a.ldr(bitdata.w(), arm::Mem(bin_byte_ptr));
+ a.rev64(bitdata, bitdata);
+ a.b_eq(read_done);
+
+ a.bind(handle_partial);
+ if (num_partial > 0) {
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_le(shift);
+ }
+
+ if (num_partial != 1) {
+ /* The segment spans five bytes. Read an additional byte and
+ * shift into place. */
+ a.ldrb(tmp.w(), arm::Mem(bin_byte_ptr, 4));
+ a.orr(bitdata, bitdata, tmp, arm::lsl(24));
+ }
+ } else if (bits <= 40) {
+ a.ands(bit_offset, bin_offset, imm(7));
+
+ /* We always need to read four bytes. */
+ a.ldr(bitdata.w(), arm::Mem(bin_byte_ptr));
+ a.rev64(bitdata, bitdata);
+
+ if (num_partial == 0) {
+ /* Byte-sized segment. If bit_offset is not byte-aligned,
+ * this segment always spans six bytes. */
+ a.b_ne(handle_partial);
+ } else if (num_partial > 1) {
+ /* The segment is smaller than five bytes. Test whether it
+ * spans five or six bytes. */
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_gt(handle_partial);
+ }
+
+ /* This segment spans five bytes. Read an additional byte. */
+ a.ldrb(tmp.w(), arm::Mem(bin_byte_ptr, 4));
+ a.orr(bitdata, bitdata, tmp, arm::lsl(24));
+ if (num_partial == 0) {
+ a.b(read_done);
+ } else if (num_partial > 1) {
+ a.b(shift);
+ }
+
+ a.bind(handle_partial);
+ if (num_partial != 1) {
+ /* This segment spans six bytes. Read two additional bytes. */
+ a.ldrh(tmp.w(), arm::Mem(bin_byte_ptr, 4));
+ a.rev16(tmp.w(), tmp.w());
+ a.orr(bitdata, bitdata, tmp, arm::lsl(16));
+ }
+ } else if (bits <= 48) {
+ a.ands(bit_offset, bin_offset, imm(7));
+ a.ldr(bitdata.w(), arm::Mem(bin_byte_ptr));
+ a.ldrh(tmp.w(), arm::Mem(bin_byte_ptr, 4));
+ a.orr(bitdata, bitdata, tmp, arm::lsl(32));
+ a.rev64(bitdata, bitdata);
+ a.b_eq(read_done);
+
+ a.bind(handle_partial);
+ if (num_partial > 1) {
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_le(shift);
+ }
+
+ if (num_partial != 1) {
+ a.ldrb(tmp.w(), arm::Mem(bin_byte_ptr, 6));
+ a.orr(bitdata, bitdata, tmp, arm::lsl(8));
+ }
+ } else if (bits <= 56) {
+ a.ands(bit_offset, bin_offset, imm(7));
+
+ if (num_partial == 0) {
+ /* Byte-sized segment. If bit_offset is not byte-aligned,
+ * this segment always spans 8 bytes. */
+ a.b_ne(handle_partial);
+ } else if (num_partial > 1) {
+ /* The segment is smaller than 8 bytes. Test whether it
+ * spans 7 or 8 bytes. */
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_gt(handle_partial);
+ }
+
+ /* This segment spans 7 bytes. */
+ a.ldr(bitdata, arm::Mem(bin_byte_ptr, -1));
+ a.lsr(bitdata, bitdata, imm(8));
+ a.b(rev64);
+
+ /* This segment spans 8 bytes. */
+ a.bind(handle_partial);
+ if (num_partial != 1) {
+ a.ldr(bitdata, arm::Mem(bin_byte_ptr));
+ }
+ need_rev64 = true;
+ } else if (bits <= 64) {
+ a.ands(bit_offset, bin_offset, imm(7));
+ a.ldr(bitdata, arm::Mem(bin_byte_ptr));
+ a.rev64(bitdata, bitdata);
+
+ if (num_partial == 0) {
+ /* Byte-sized segment. If it is aligned it spans 8 bytes
+ * and we are done. */
+ a.b_eq(read_done);
+ } else if (num_partial == 1) {
+ /* This segment is 57 bits wide. It always spans 8 bytes. */
+ a.b(shift);
+ } else {
+ /* The segment is smaller than 8 bytes. Test whether it
+ * spans 8 or 9 bytes. */
+ a.cmp(bit_offset, imm(8 - num_partial));
+ a.b_le(shift);
+ }
+
+ /* This segments spans 9 bytes. Read an additional byte. */
+ a.bind(handle_partial);
+ if (num_partial != 1) {
+ a.ldrb(tmp.w(), arm::Mem(bin_byte_ptr, 8));
+ a.lsl(bitdata, bitdata, bit_offset);
+ a.lsl(tmp, tmp, bit_offset);
+ a.orr(bitdata, bitdata, tmp, arm::lsr(8));
+ a.b(read_done);
+ }
+ }
+
+ a.bind(rev64);
+ if (need_rev64) {
+ a.rev64(bitdata, bitdata);
+ }
+
+ /* Shift the read data into the most significant bits of the
+ * word. */
+ a.bind(shift);
+ a.lsl(bitdata, bitdata, bit_offset);
+
+ a.bind(read_done);
+}
+
+void BeamModuleAssembler::emit_extract_integer(const arm::Gp bitdata,
+ Uint flags,
+ Uint bits,
+ const ArgRegister &Dst) {
+ Label big = a.newLabel();
+ Label done = a.newLabel();
+ arm::Gp data_reg;
+ auto dst = init_destination(Dst, TMP1);
+ Uint num_partial = bits % 8;
+ Uint num_complete = 8 * (bits / 8);
+
+ if (bits <= 8) {
+ /* Endian does not matter for values that fit in a byte. */
+ flags &= ~BSF_LITTLE;
+ }
+
+ /* If this segment is little-endian, reverse endianness. */
+ if ((flags & BSF_LITTLE) != 0) {
+ comment("reverse endian for a little-endian segment");
+ }
+ data_reg = TMP2;
+ if ((flags & BSF_LITTLE) == 0) {
+ data_reg = bitdata;
+ } else if (bits == 16) {
+ a.rev16(TMP2, bitdata);
+ } else if (bits == 32) {
+ a.rev32(TMP2, bitdata);
+ } else if (num_partial == 0) {
+ a.rev64(TMP2, bitdata);
+ a.lsr(TMP2, TMP2, arm::lsr(64 - bits));
+ } else {
+ a.ubfiz(TMP3, bitdata, imm(num_complete), imm(num_partial));
+ a.ubfx(TMP2, bitdata, imm(num_partial), imm(num_complete));
+ a.rev64(TMP2, TMP2);
+ a.orr(TMP2, TMP3, TMP2, arm::lsr(64 - num_complete));
+ }
+
+ /* Sign-extend the number if the segment is signed. */
+ if ((flags & BSF_SIGNED) != 0) {
+ if (0 < bits && bits < 64) {
+ comment("sign extend extracted value");
+ a.lsl(TMP2, data_reg, imm(64 - bits));
+ a.asr(TMP2, TMP2, imm(64 - bits));
+ data_reg = TMP2;
+ }
+ }
+
+ /* Handle segments whose values might not fit in a small integer. */
+ if (bits >= SMALL_BITS) {
+ comment("test whether it fits in a small");
+ if (bits < 64 && (flags & BSF_SIGNED) == 0) {
+ a.and_(TMP2, data_reg, imm((1ull << bits) - 1));
+ data_reg = TMP2;
+ }
+ if ((flags & BSF_SIGNED) != 0) {
+ /* Signed segment. */
+ a.adds(TMP3, ZERO, data_reg, arm::lsr(SMALL_BITS - 1));
+ a.ccmp(TMP3,
+ imm(_TAG_IMMED1_MASK << 1 | 1),
+ imm(NZCV::kEqual),
+ imm(arm::CondCode::kNE));
+ a.b_ne(big);
+ } else {
+ /* Unsigned segment. */
+ a.lsr(TMP3, data_reg, imm(SMALL_BITS - 1));
+ a.cbnz(TMP3, big);
+ }
+ }
+
+ /* Tag and store the extracted small integer. */
+ comment("store extracted integer as a small");
+ mov_imm(dst.reg, _TAG_IMMED1_SMALL);
+ if ((flags & BSF_SIGNED) != 0) {
+ a.orr(dst.reg, dst.reg, data_reg, arm::lsl(_TAG_IMMED1_SIZE));
+ } else {
+ if (bits >= SMALL_BITS) {
+ a.bfi(dst.reg,
+ data_reg,
+ arm::lsl(_TAG_IMMED1_SIZE),
+ imm(SMALL_BITS));
+ } else if (bits != 0) {
+ a.bfi(dst.reg, data_reg, arm::lsl(_TAG_IMMED1_SIZE), imm(bits));
+ }
+ }
+
+ if (bits >= SMALL_BITS) {
+ a.b(done);
+ }
+
+ /* Handle a bignum (up to 64 bits). */
+ a.bind(big);
+ if (bits >= SMALL_BITS) {
+ comment("store extracted integer as a bignum");
+ a.add(dst.reg, HTOP, imm(TAG_PRIMARY_BOXED));
+ mov_imm(TMP3, make_pos_bignum_header(1));
+ if ((flags & BSF_SIGNED) == 0) {
+ /* Unsigned. */
+ a.stp(TMP3, data_reg, arm::Mem(HTOP).post(sizeof(Eterm[2])));
+ } else {
+ /* Signed. */
+ Label store = a.newLabel();
+ a.adds(TMP2, data_reg, ZERO);
+ a.b_pl(store);
+
+ mov_imm(TMP3, make_neg_bignum_header(1));
+ a.neg(TMP2, TMP2);
+
+ a.bind(store);
+ a.stp(TMP3, TMP2, arm::Mem(HTOP).post(sizeof(Eterm[2])));
+ }
+ }
+
+ a.bind(done);
+ flush_var(dst);
+}
+
+void BeamModuleAssembler::emit_extract_binary(const arm::Gp bitdata,
+ Uint bits,
+ const ArgRegister &Dst) {
+ auto dst = init_destination(Dst, TMP1);
+ Uint num_bytes = bits / 8;
+
+ a.add(dst.reg, HTOP, imm(TAG_PRIMARY_BOXED));
+ mov_imm(TMP2, header_heap_bin(num_bytes));
+ mov_imm(TMP3, num_bytes);
+ a.rev64(TMP4, bitdata);
+ a.stp(TMP2, TMP3, arm::Mem(HTOP).post(sizeof(Eterm[2])));
+ if (num_bytes != 0) {
+ a.str(TMP4, arm::Mem(HTOP).post(sizeof(Eterm[1])));
+ }
+ flush_var(dst);
+}
+
+static std::vector<BsmSegment> opt_bsm_segments(
+ const std::vector<BsmSegment> segments,
+ const ArgWord &Need,
+ const ArgWord &Live) {
+ std::vector<BsmSegment> segs;
+
+ Uint heap_need = Need.get();
+
+ /*
+ * First calculate the total number of heap words needed for
+ * bignums and binaries.
+ */
+ for (auto seg : segments) {
+ switch (seg.action) {
+ case BsmSegment::action::GET_INTEGER:
+ if (seg.size >= SMALL_BITS) {
+ heap_need += BIG_NEED_FOR_BITS(seg.size);
+ }
+ break;
+ case BsmSegment::action::GET_BINARY:
+ heap_need += heap_bin_size((seg.size + 7) / 8);
+ break;
+ case BsmSegment::action::GET_TAIL:
+ heap_need += EXTRACT_SUB_BIN_HEAP_NEED;
+ break;
+ default:
+ break;
+ }
+ }
+
+ int index = 0;
+ int read_action_pos = -1;
+
+ index = 0;
+ for (auto seg : segments) {
+ if (heap_need != 0 && seg.live.isWord()) {
+ BsmSegment s = seg;
+
+ read_action_pos = -1;
+ s.action = BsmSegment::action::TEST_HEAP;
+ s.size = heap_need;
+ segs.push_back(s);
+ index++;
+ heap_need = 0;
+ }
+
+ switch (seg.action) {
+ case BsmSegment::action::GET_INTEGER:
+ case BsmSegment::action::GET_BINARY:
+ if (seg.size > 64) {
+ read_action_pos = -1;
+ } else if (seg.action == BsmSegment::action::GET_BINARY &&
+ seg.size % 8 != 0) {
+ read_action_pos = -1;
+ } else {
+ if ((seg.flags & BSF_LITTLE) != 0 || read_action_pos < 0 ||
+ seg.size + segs.at(read_action_pos).size > 64) {
+ BsmSegment s;
+
+ /* Create a new READ action. */
+ read_action_pos = index;
+ s.action = BsmSegment::action::READ;
+ s.size = seg.size;
+ segs.push_back(s);
+ index++;
+ } else {
+ /* Reuse previous READ action. */
+ segs.at(read_action_pos).size += seg.size;
+ }
+ switch (seg.action) {
+ case BsmSegment::action::GET_INTEGER:
+ seg.action = BsmSegment::action::EXTRACT_INTEGER;
+ break;
+ case BsmSegment::action::GET_BINARY:
+ seg.action = BsmSegment::action::EXTRACT_BINARY;
+ break;
+ default:
+ break;
+ }
+ }
+ segs.push_back(seg);
+ break;
+ case BsmSegment::action::EQ: {
+ if (read_action_pos < 0 ||
+ seg.size + segs.at(read_action_pos).size > 64) {
+ BsmSegment s;
+
+ /* Create a new READ action. */
+ read_action_pos = index;
+ s.action = BsmSegment::action::READ;
+ s.size = seg.size;
+ segs.push_back(s);
+ index++;
+ } else {
+ /* Reuse previous READ action. */
+ segs.at(read_action_pos).size += seg.size;
+ }
+ auto &prev = segs.back();
+ if (prev.action == BsmSegment::action::EQ &&
+ prev.size + seg.size <= 64) {
+ /* Coalesce with the previous EQ instruction. */
+ prev.size += seg.size;
+ prev.unit = prev.unit << seg.size | seg.unit;
+ index--;
+ } else {
+ segs.push_back(seg);
+ }
+ break;
+ }
+ case BsmSegment::action::SKIP:
+ if (read_action_pos >= 0 &&
+ seg.size + segs.at(read_action_pos).size <= 64) {
+ segs.at(read_action_pos).size += seg.size;
+ seg.action = BsmSegment::action::DROP;
+ } else {
+ read_action_pos = -1;
+ }
+ segs.push_back(seg);
+ break;
+ default:
+ read_action_pos = -1;
+ segs.push_back(seg);
+ break;
+ }
+ index++;
+ }
+
+ /* Handle a trailing test_heap instruction (for the
+ * i_bs_match_test_heap instruction). */
+ if (heap_need) {
+ BsmSegment seg;
+
+ seg.action = BsmSegment::action::TEST_HEAP;
+ seg.size = heap_need;
+ seg.live = Live;
+ segs.push_back(seg);
+ }
+ return segs;
+}
+
+UWord BeamModuleAssembler::bs_get_flags(const ArgVal &val) {
+ if (val.isNil()) {
+ return 0;
+ } else if (val.isLiteral()) {
+ Eterm term = beamfile_get_literal(beam, val.as<ArgLiteral>().get());
+ UWord flags = 0;
+
+ while (is_list(term)) {
+ Eterm *consp = list_val(term);
+ Eterm elem = CAR(consp);
+ switch (elem) {
+ case am_little:
+ case am_native:
+ flags |= BSF_LITTLE;
+ break;
+ case am_signed:
+ flags |= BSF_SIGNED;
+ break;
+ }
+ term = CDR(consp);
+ }
+ ASSERT(is_nil(term));
+ return flags;
+ } else if (val.isWord()) {
+ /* Originates from bs_get_integer2 instruction. */
+ return val.as<ArgWord>().get();
+ } else {
+ ASSERT(0); /* Should not happen. */
+ return 0;
+ }
+}
+
+void BeamModuleAssembler::emit_i_bs_match(ArgLabel const &Fail,
+ ArgRegister const &Ctx,
+ Span<ArgVal> const &List) {
+ emit_i_bs_match_test_heap(Fail, Ctx, ArgWord(0), ArgWord(0), List);
+}
+
+void BeamModuleAssembler::emit_i_bs_match_test_heap(ArgLabel const &Fail,
+ ArgRegister const &Ctx,
+ ArgWord const &Need,
+ ArgWord const &Live,
+ Span<ArgVal> const &List) {
+ const int orig_offset = offsetof(ErlBinMatchState, mb.orig);
+ const int base_offset = offsetof(ErlBinMatchState, mb.base);
+ const int position_offset = offsetof(ErlBinMatchState, mb.offset);
+ const int size_offset = offsetof(ErlBinMatchState, mb.size);
+
+ std::vector<BsmSegment> segments;
+
+ auto current = List.begin();
+ auto end = List.begin() + List.size();
+
+ while (current < end) {
+ auto cmd = current++->as<ArgImmed>().get();
+ BsmSegment seg;
+
+ switch (cmd) {
+ case am_ensure_at_least: {
+ seg.action = BsmSegment::action::ENSURE_AT_LEAST;
+ seg.size = current[0].as<ArgWord>().get();
+ seg.unit = current[1].as<ArgWord>().get();
+ current += 2;
+ break;
+ }
+ case am_ensure_exactly: {
+ seg.action = BsmSegment::action::ENSURE_EXACTLY;
+ seg.size = current[0].as<ArgWord>().get();
+ current += 1;
+ break;
+ }
+ case am_binary:
+ case am_integer: {
+ auto size = current[2].as<ArgWord>().get();
+ auto unit = current[3].as<ArgWord>().get();
+
+ switch (cmd) {
+ case am_integer:
+ seg.action = BsmSegment::action::GET_INTEGER;
+ break;
+ case am_binary:
+ seg.action = BsmSegment::action::GET_BINARY;
+ break;
+ }
+
+ seg.live = current[0];
+ seg.size = size * unit;
+ seg.unit = unit;
+ seg.flags = bs_get_flags(current[1]);
+ seg.dst = current[4].as<ArgRegister>();
+ current += 5;
+ break;
+ }
+ case am_get_tail: {
+ seg.action = BsmSegment::action::GET_TAIL;
+ seg.live = current[0].as<ArgWord>();
+ seg.dst = current[2].as<ArgRegister>();
+ current += 3;
+ break;
+ }
+ case am_skip: {
+ seg.action = BsmSegment::action::SKIP;
+ seg.size = current[0].as<ArgWord>().get();
+ seg.flags = 0;
+ current += 1;
+ break;
+ }
+ case am_Eq: {
+ seg.action = BsmSegment::action::EQ;
+ seg.live = current[0];
+ seg.size = current[1].as<ArgWord>().get();
+ seg.unit = current[2].as<ArgWord>().get();
+ current += 3;
+ break;
+ }
+ default:
+ abort();
+ break;
+ }
+ segments.push_back(seg);
+ }
+
+ segments = opt_bsm_segments(segments, Need, Live);
+
+ const arm::Gp bin_base = ARG2;
+ const arm::Gp bin_position = ARG3;
+ const arm::Gp bin_size = ARG4;
+ const arm::Gp bitdata = ARG8;
+ bool position_is_valid = false;
+
+ for (auto seg : segments) {
+ switch (seg.action) {
+ case BsmSegment::action::ENSURE_AT_LEAST: {
+ comment("ensure_at_least %ld %ld", seg.size, seg.unit);
+ auto ctx_reg = load_source(Ctx, TMP1);
+ auto stride = seg.size;
+ auto unit = seg.unit;
+
+ a.ldur(bin_position, emit_boxed_val(ctx_reg.reg, position_offset));
+ a.ldur(bin_size, emit_boxed_val(ctx_reg.reg, size_offset));
+ a.sub(TMP5, bin_size, bin_position);
+ if (stride != 0) {
+ cmp(TMP5, stride);
+ a.b_lo(resolve_beam_label(Fail, disp1MB));
+ }
+
+ if (unit != 1) {
+ if (stride % unit != 0) {
+ sub(TMP5, TMP5, stride);
+ }
+
+ if ((unit & (unit - 1)) != 0) {
+ mov_imm(TMP4, unit);
+
+ a.udiv(TMP3, TMP5, TMP4);
+ a.msub(TMP5, TMP3, TMP4, TMP5);
+
+ a.cbnz(TMP5, resolve_beam_label(Fail, disp1MB));
+ } else {
+ a.tst(TMP5, imm(unit - 1));
+ a.b_ne(resolve_beam_label(Fail, disp1MB));
+ }
+ }
+
+ position_is_valid = true;
+ break;
+ }
+ case BsmSegment::action::ENSURE_EXACTLY: {
+ comment("ensure_exactly %ld", seg.size);
+ auto ctx_reg = load_source(Ctx, TMP1);
+ auto size = seg.size;
+
+ a.ldur(bin_position, emit_boxed_val(ctx_reg.reg, position_offset));
+ a.ldur(TMP3, emit_boxed_val(ctx_reg.reg, size_offset));
+ if (size != 0) {
+ a.sub(TMP1, TMP3, bin_position);
+ cmp(TMP1, size);
+ } else {
+ a.subs(TMP1, TMP3, bin_position);
+ }
+ a.b_ne(resolve_beam_label(Fail, disp1MB));
+ position_is_valid = true;
+ break;
+ }
+ case BsmSegment::action::EQ: {
+ comment("=:= %ld %ld", seg.size, seg.unit);
+ if (seg.size != 0 && seg.size != 64) {
+ a.ror(bitdata, bitdata, imm(64 - seg.size));
+ }
+ if (seg.size == 64) {
+ cmp(bitdata, seg.unit);
+ } else if (seg.size == 32) {
+ cmp(bitdata.w(), seg.unit);
+ } else if (seg.unit == 0) {
+ a.tst(bitdata, imm((1ull << seg.size) - 1));
+ } else {
+ a.and_(TMP1, bitdata, imm((1ull << seg.size) - 1));
+ cmp(TMP1, seg.unit);
+ }
+ a.b_ne(resolve_beam_label(Fail, disp1MB));
+ break;
+ }
+ case BsmSegment::action::TEST_HEAP: {
+ comment("test_heap %ld", seg.size);
+ emit_gc_test(ArgWord(0), ArgWord(seg.size), seg.live);
+ position_is_valid = false;
+ break;
+ }
+ case BsmSegment::action::READ: {
+ comment("read %ld", seg.size);
+ if (seg.size == 0) {
+ comment("(nothing to do)");
+ } else {
+ auto ctx = load_source(Ctx, ARG1);
+
+ if (!position_is_valid) {
+ a.ldur(bin_position,
+ emit_boxed_val(ctx.reg, position_offset));
+ position_is_valid = true;
+ }
+ a.ldur(bin_base, emit_boxed_val(ctx.reg, base_offset));
+
+ emit_read_bits(seg.size, bin_base, bin_position, bitdata);
+
+ a.add(bin_position, bin_position, imm(seg.size));
+ a.stur(bin_position, emit_boxed_val(ctx.reg, position_offset));
+ }
+ break;
+ }
+ case BsmSegment::action::EXTRACT_BINARY: {
+ auto bits = seg.size;
+ auto Dst = seg.dst;
+
+ comment("extract binary %ld", bits);
+ emit_extract_binary(bitdata, bits, Dst);
+ if (bits != 0 && bits != 64) {
+ a.ror(bitdata, bitdata, imm(64 - bits));
+ }
+ break;
+ }
+ case BsmSegment::action::EXTRACT_INTEGER: {
+ auto bits = seg.size;
+ auto flags = seg.flags;
+ auto Dst = seg.dst;
+
+ comment("extract integer %ld", bits);
+ if (bits != 0 && bits != 64) {
+ a.ror(bitdata, bitdata, imm(64 - bits));
+ }
+ emit_extract_integer(bitdata, flags, bits, Dst);
+ break;
+ }
+ case BsmSegment::action::GET_INTEGER: {
+ Uint live = seg.live.as<ArgWord>().get();
+ Uint flags = seg.flags;
+ auto bits = seg.size;
+ auto Dst = seg.dst;
+
+ comment("get integer %ld", bits);
+ auto ctx = load_source(Ctx, TMP1);
+
+ a.mov(ARG1, c_p);
+ a.mov(ARG2, bits);
+ a.mov(ARG3, flags);
+ lea(ARG4, emit_boxed_val(ctx.reg, offsetof(ErlBinMatchState, mb)));
+
+ if (bits >= SMALL_BITS) {
+ emit_enter_runtime<Update::eHeapOnlyAlloc>(live);
+ } else {
+ emit_enter_runtime(live);
+ }
+
+ runtime_call<4>(erts_bs_get_integer_2);
+
+ if (bits >= SMALL_BITS) {
+ emit_leave_runtime<Update::eHeapOnlyAlloc>(live);
+ } else {
+ emit_leave_runtime(live);
+ }
+
+ mov_arg(Dst, ARG1);
+
+ position_is_valid = false;
+ break;
+ }
+ case BsmSegment::action::GET_BINARY: {
+ auto Live = seg.live;
+ comment("get binary %ld", seg.size);
+ auto ctx = load_source(Ctx, TMP1);
+
+ lea(ARG1, arm::Mem(c_p, offsetof(Process, htop)));
+ a.ldur(ARG2, emit_boxed_val(ctx.reg, orig_offset));
+ a.ldur(ARG3, emit_boxed_val(ctx.reg, base_offset));
+ a.ldur(ARG4, emit_boxed_val(ctx.reg, position_offset));
+ mov_imm(ARG5, seg.size);
+ a.add(TMP2, ARG4, ARG5);
+ a.stur(TMP2, emit_boxed_val(ctx.reg, position_offset));
+
+ emit_enter_runtime<Update::eHeapOnlyAlloc>(
+ Live.as<ArgWord>().get());
+
+ runtime_call<5>(erts_extract_sub_binary);
+
+ emit_leave_runtime<Update::eHeapOnlyAlloc>(
+ Live.as<ArgWord>().get());
+
+ mov_arg(seg.dst, ARG1);
+ position_is_valid = false;
+ break;
+ }
+ case BsmSegment::action::GET_TAIL: {
+ comment("get_tail");
+
+ mov_arg(ARG1, Ctx);
+ fragment_call(ga->get_bs_get_tail_shared());
+ mov_arg(seg.dst, ARG1);
+ position_is_valid = false;
+ break;
+ }
+ case BsmSegment::action::SKIP: {
+ comment("skip %ld", seg.size);
+ auto ctx = load_source(Ctx, TMP1);
+ if (!position_is_valid) {
+ a.ldur(bin_position, emit_boxed_val(ctx.reg, position_offset));
+ position_is_valid = true;
+ }
+ add(bin_position, bin_position, seg.size);
+ a.stur(bin_position, emit_boxed_val(ctx.reg, position_offset));
+ break;
+ }
+ case BsmSegment::action::DROP:
+ auto bits = seg.size;
+ comment("drop %ld", bits);
+ if (bits != 0 && bits != 64) {
+ a.ror(bitdata, bitdata, imm(64 - bits));
+ }
+ break;
+ }
+ }
+}