/* * %CopyrightBegin% * * Copyright Ericsson AB 1999-2020. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * %CopyrightEnd% */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "sys.h" #include "erl_vm.h" #include "global.h" #include "erl_process.h" #include "error.h" #include "bif.h" #include "big.h" #include "erl_bits.h" #include "erl_binary.h" #if defined(WORDS_BIGENDIAN) # define BIT_ENDIAN_MACHINE 0 #else # define BIT_ENDIAN_MACHINE BSF_LITTLE #endif #define BIT_IS_MACHINE_ENDIAN(x) (((x)&BSF_LITTLE) == BIT_ENDIAN_MACHINE) #if (SIZEOF__FLOAT16 == 2) typedef _Float16 erlfp16; #define FP16_FROM_FP64(x) ((_Float16) x) #define FP16_TO_FP64(x) ((double) x) #else typedef Uint16 erlfp16; #define FP16_FROM_FP64(x) (fp16_ieee_from_fp32_value((float) x)) #define FP16_TO_FP64(x) ((double) fp16_ieee_to_fp32_value(x)) #include "erl_bits_f16.h" #endif /* * Here is how many bits we can copy in each reduction. * * At the time of writing of this comment, CONTEXT_REDS was 4000 and * BITS_PER_REDUCTION was 1 KiB (8192 bits). The time for copying an * unaligned 4000 KiB binary on my computer (which has a 4,2 GHz Intel * i7 CPU) was about 5 ms. The time was approximately 4 times lower if * the source and destinations binaries were aligned. */ #define BITS_PER_REDUCTION (8*1024) /* * MAKE_MASK(n) constructs a mask with n bits. * Example: MAKE_MASK(3) returns the binary number 00000111. */ #define MAKE_MASK(n) ((((Uint) 1) << (n))-1) /* * MASK_BITS assign src to dst, but preserves the dst bits outside the mask. */ #define MASK_BITS(src,dst,mask) (((src) & (mask)) | ((dst) & ~(mask))) static byte get_bit(byte b, size_t a_offs); /* the state resides in the current process' scheduler data */ #define byte_buf (ErlBitsState.byte_buf_) #define byte_buf_len (ErlBitsState.byte_buf_len_) static erts_atomic_t bits_bufs_size; Uint erts_bits_bufs_size(void) { return (Uint) erts_atomic_read_nob(&bits_bufs_size); } void erts_bits_init_state(ERL_BITS_PROTO_0) { byte_buf_len = 1; byte_buf = erts_alloc(ERTS_ALC_T_BITS_BUF, byte_buf_len); erts_bin_offset = 0; } void erts_bits_destroy_state(ERL_BITS_PROTO_0) { erts_free(ERTS_ALC_T_BITS_BUF, byte_buf); } void erts_init_bits(void) { ERTS_CT_ASSERT(offsetof(Binary,orig_bytes) % 8 == 0); ERTS_CT_ASSERT(offsetof(ErtsMagicBinary,u.aligned.data) % 8 == 0); ERTS_CT_ASSERT(offsetof(ErtsBinary,driver.binary.orig_bytes) == offsetof(Binary,orig_bytes)); erts_atomic_init_nob(&bits_bufs_size, 0); /* erl_process.c calls erts_bits_init_state() on all state instances */ } /***************************************************************** *** *** New matching binaries functions *** *****************************************************************/ #define ReadToVariable(v64, Buffer, x) \ do{ \ int _i; \ v64 = 0; \ for(_i = 0; _i < x; _i++) { \ v64 = ((Uint)Buffer[_i] <<(8*_i)) + v64; \ } \ }while(0) \ Eterm erts_bs_start_match_2(Process *p, Eterm Binary, Uint Max) { Eterm Orig; Uint offs; Uint* hp; Uint NeededSize; ErlBinMatchState *ms; Uint bitoffs; Uint bitsize; Uint total_bin_size; ProcBin* pb; ASSERT(is_binary(Binary)); total_bin_size = binary_size(Binary); ASSERT(total_bin_size <= ERTS_UWORD_MAX / CHAR_BIT); NeededSize = ERL_BIN_MATCHSTATE_SIZE(Max); hp = HeapOnlyAlloc(p, NeededSize); ms = (ErlBinMatchState *) hp; ERTS_GET_REAL_BIN(Binary, Orig, offs, bitoffs, bitsize); pb = (ProcBin *) boxed_val(Orig); if (pb->thing_word == HEADER_PROC_BIN && pb->flags != 0) { erts_emasculate_writable_binary(pb); } ms->thing_word = HEADER_BIN_MATCHSTATE(Max); (ms->mb).orig = Orig; (ms->mb).base = binary_bytes(Orig); (ms->mb).offset = ms->save_offset[0] = 8 * offs + bitoffs; (ms->mb).size = total_bin_size * 8 + (ms->mb).offset + bitsize; return make_matchstate(ms); } ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Binary) { Eterm Orig; Uint offs; Uint* hp; Uint NeededSize; ErlBinMatchState *ms; Uint bitoffs; Uint bitsize; Uint total_bin_size; ProcBin* pb; ASSERT(is_binary(Binary)); total_bin_size = binary_size(Binary); ASSERT(total_bin_size <= ERTS_UWORD_MAX / CHAR_BIT); NeededSize = ERL_BIN_MATCHSTATE_SIZE(0); hp = HeapOnlyAlloc(p, NeededSize); ms = (ErlBinMatchState *) hp; ERTS_GET_REAL_BIN(Binary, Orig, offs, bitoffs, bitsize); pb = (ProcBin *) boxed_val(Orig); if (pb->thing_word == HEADER_PROC_BIN && pb->flags != 0) { erts_emasculate_writable_binary(pb); } ms->thing_word = HEADER_BIN_MATCHSTATE(0); (ms->mb).orig = Orig; (ms->mb).base = binary_bytes(Orig); (ms->mb).offset = 8 * offs + bitoffs; (ms->mb).size = total_bin_size * 8 + (ms->mb).offset + bitsize; return ms; } #ifdef DEBUG # define CHECK_MATCH_BUFFER(MB) check_match_buffer(MB) static void check_match_buffer(ErlBinMatchBuffer* mb) { Eterm realbin; Uint byteoffs; byte* bytes, bitoffs, bitsz; ProcBin* pb; ERTS_GET_REAL_BIN(mb->orig, realbin, byteoffs, bitoffs, bitsz); bytes = binary_bytes(realbin) + byteoffs; ERTS_ASSERT(mb->base >= bytes && mb->base <= (bytes + binary_size(mb->orig))); pb = (ProcBin *) boxed_val(realbin); if (pb->thing_word == HEADER_PROC_BIN) ERTS_ASSERT(pb->flags == 0); } #else # define CHECK_MATCH_BUFFER(MB) #endif Eterm erts_bs_get_integer_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb) { Uint bytes; Uint bits; Uint offs; byte bigbuf[64]; byte* LSB; byte* MSB; Uint* hp; Uint words_needed; Uint actual; Uint v32; int sgn = 0; Eterm res = THE_NON_VALUE; if (num_bits == 0) { return SMALL_ZERO; } CHECK_MATCH_BUFFER(mb); if (mb->size - mb->offset < num_bits) { /* Asked for too many bits. */ return THE_NON_VALUE; } /* * Special cases for field sizes up to the size of Uint. */ if (num_bits <= 8-(offs = BIT_OFFSET(mb->offset))) { /* * All bits are in one byte in the binary. We only need * shift them right and mask them. */ Uint b = mb->base[BYTE_OFFSET(mb->offset)]; Uint mask = MAKE_MASK(num_bits); mb->offset += num_bits; b >>= 8 - offs - num_bits; b &= mask; if ((flags & BSF_SIGNED) && b >> (num_bits-1)) { b |= ~mask; } return make_small(b); } else if (num_bits <= 8) { /* * The bits are in two different bytes. It is easiest to * combine the bytes to a word first, and then shift right and * mask to extract the bits. */ Uint byte_offset = BYTE_OFFSET(mb->offset); Uint w = mb->base[byte_offset] << 8 | mb->base[byte_offset+1]; Uint mask = MAKE_MASK(num_bits); mb->offset += num_bits; w >>= 16 - offs - num_bits; w &= mask; if ((flags & BSF_SIGNED) && w >> (num_bits-1)) { w |= ~mask; } return make_small(w); } else if (num_bits < SMALL_BITS && (flags & BSF_LITTLE) == 0) { /* * Handle field sizes from 9 up to SMALL_BITS-1 bits, big-endian, * stored in at least two bytes. */ byte* bp = mb->base + BYTE_OFFSET(mb->offset); Uint n; Uint w; n = num_bits; mb->offset += num_bits; /* * Handle the most signicant byte if it contains 1 to 7 bits. * It only needs to be masked, not shifted. */ if (offs == 0) { w = 0; } else { Uint num_bits_in_msb = 8 - offs; w = *bp++; n -= num_bits_in_msb; w &= MAKE_MASK(num_bits_in_msb); } /* * Simply shift whole bytes into the result. */ switch (BYTE_OFFSET(n)) { #if defined(ARCH_64) case 7: w = (w << 8) | *bp++; case 6: w = (w << 8) | *bp++; case 5: w = (w << 8) | *bp++; case 4: w = (w << 8) | *bp++; #endif case 3: w = (w << 8) | *bp++; case 2: w = (w << 8) | *bp++; case 1: w = (w << 8) | *bp++; } n = BIT_OFFSET(n); /* * Handle the 1 to 7 bits remaining in the last byte (if any). * They need to be shifted right, but there is no need to mask; * then they can be shifted into the word. */ if (n > 0) { Uint b = *bp; b >>= 8 - n; w = (w << n) | b; } /* * Sign extend the result if the field type is 'signed' and the * most significant bit is 1. */ if ((flags & BSF_SIGNED) != 0 && (w >> (num_bits-1) != 0)) { w |= ~MAKE_MASK(num_bits); } return make_small(w); } /* * Handle everything else, that is: * * Big-endian fields >= SMALL_BITS (potentially bignums). * Little-endian fields with 9 or more bits. */ bytes = NBYTES(num_bits); if ((bits = BIT_OFFSET(num_bits)) == 0) { /* number of bits in MSB */ bits = 8; } offs = 8 - bits; /* adjusted offset in MSB */ if (bytes <= sizeof bigbuf) { LSB = bigbuf; } else { LSB = erts_alloc(ERTS_ALC_T_TMP, bytes); } MSB = LSB + bytes - 1; /* * Move bits to temporary buffer. We want the buffer to be stored in * little-endian order, since bignums are little-endian. */ if (flags & BSF_LITTLE) { erts_copy_bits(mb->base, mb->offset, 1, LSB, 0, 1, num_bits); *MSB >>= offs; /* adjust msb */ } else { *MSB = 0; erts_copy_bits(mb->base, mb->offset, 1, MSB, offs, -1, num_bits); } mb->offset += num_bits; /* * Get the sign bit. */ sgn = 0; if ((flags & BSF_SIGNED) && (*MSB & (1<<(bits-1)))) { byte* ptr = LSB; byte c = 1; /* sign extend MSB */ *MSB |= ~MAKE_MASK(bits); /* two's complement */ while (ptr <= MSB) { byte pd = ~(*ptr); byte d = pd + c; c = (d < pd); *ptr++ = d; } sgn = 1; } /* normalize */ while ((*MSB == 0) && (MSB > LSB)) { MSB--; bytes--; } /* check for guaranteed small num */ switch (bytes) { case 1: v32 = LSB[0]; goto big_small; case 2: v32 = LSB[0] + (LSB[1]<<8); goto big_small; case 3: v32 = LSB[0] + (LSB[1]<<8) + (LSB[2]<<16); goto big_small; #if !defined(ARCH_64) case 4: v32 = (LSB[0] + (LSB[1]<<8) + (LSB[2]<<16) + (LSB[3]<<24)); if (!IS_USMALL(sgn, v32)) { goto make_big; } #else case 4: ReadToVariable(v32, LSB, 4); goto big_small; case 5: ReadToVariable(v32, LSB, 5); goto big_small; case 6: ReadToVariable(v32, LSB, 6); goto big_small; case 7: ReadToVariable(v32, LSB, 7); goto big_small; case 8: ReadToVariable(v32, LSB, 8); if (!IS_USMALL(sgn, v32)) { goto make_big; } #endif big_small: /* v32 loaded with value which fits in fixnum */ if (sgn) { res = make_small(-((Sint)v32)); } else { res = make_small(v32); } break; make_big: hp = HeapOnlyAlloc(p, BIG_UINT_HEAP_SIZE); if (sgn) { hp[0] = make_neg_bignum_header(1); } else { hp[0] = make_pos_bignum_header(1); } BIG_DIGIT(hp,0) = v32; res = make_big(hp); break; default: words_needed = 1+WSIZE(bytes); hp = HeapOnlyAlloc(p, words_needed); res = bytes_to_big(LSB, bytes, sgn, hp); if (is_nil(res)) { p->htop = hp; res = THE_NON_VALUE; } else if (is_small(res)) { p->htop = hp; } else if ((actual = bignum_header_arity(*hp)+1) < words_needed) { p->htop = hp + actual; } break; } if (LSB != bigbuf) { erts_free(ERTS_ALC_T_TMP, (void *) LSB); } return res; } Eterm erts_bs_get_binary_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb) { Eterm result; CHECK_MATCH_BUFFER(mb); if (mb->size - mb->offset < num_bits) { /* Asked for too many bits. */ return THE_NON_VALUE; } /* * From now on, we can't fail. */ result = erts_extract_sub_binary(&HEAP_TOP(p), mb->orig, mb->base, mb->offset, num_bits); mb->offset += num_bits; return result; } Eterm erts_bs_get_float_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb) { Eterm* hp; erlfp16 f16; float f32; double f64; byte* fptr; FloatDef f; CHECK_MATCH_BUFFER(mb); if (num_bits == 0) { f.fd = 0.0; hp = HeapOnlyAlloc(p, FLOAT_SIZE_OBJECT); PUT_DOUBLE(f, hp); return make_float(hp); } if (mb->size - mb->offset < num_bits) { /* Asked for too many bits. */ return THE_NON_VALUE; } if (num_bits == 16) { fptr = (byte *) &f16; } else if (num_bits == 32) { fptr = (byte *) &f32; } else if (num_bits == 64) { fptr = (byte *) &f64; } else { return THE_NON_VALUE; } if (BIT_IS_MACHINE_ENDIAN(flags)) { erts_copy_bits(mb->base, mb->offset, 1, fptr, 0, 1, num_bits); } else { erts_copy_bits(mb->base, mb->offset, 1, fptr + NBYTES(num_bits) - 1, 0, -1, num_bits); } ERTS_FP_CHECK_INIT(p); if (num_bits == 16) { f.fd = FP16_TO_FP64(f16); ERTS_FP_ERROR_THOROUGH(p, f.fd, return THE_NON_VALUE); } else if (num_bits == 32) { ERTS_FP_ERROR_THOROUGH(p, f32, return THE_NON_VALUE); f.fd = f32; } else { #ifdef DOUBLE_MIDDLE_ENDIAN FloatDef ftmp; ftmp.fd = f64; f.fw[0] = ftmp.fw[1]; f.fw[1] = ftmp.fw[0]; ERTS_FP_ERROR_THOROUGH(p, f.fd, return THE_NON_VALUE); #else ERTS_FP_ERROR_THOROUGH(p, f64, return THE_NON_VALUE); f.fd = f64; #endif } mb->offset += num_bits; hp = HeapOnlyAlloc(p, FLOAT_SIZE_OBJECT); PUT_DOUBLE(f, hp); return make_float(hp); } Eterm erts_bs_get_binary_all_2(Process *p, ErlBinMatchBuffer* mb) { Uint bit_size; Eterm result; CHECK_MATCH_BUFFER(mb); bit_size = mb->size - mb->offset; result = erts_extract_sub_binary(&HEAP_TOP(p), mb->orig, mb->base, mb->offset, bit_size); mb->offset = mb->size; return result; } /**************************************************************** *** *** Building binaries *** ****************************************************************/ /* COPY_VAL: * copy sz byte from val to dst buffer, * dst, val are updated!!! */ #define COPY_VAL(dst,ddir,val,sz) do { \ Uint __sz = (sz); \ while(__sz) { \ switch(__sz) { \ default: \ case 4: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ case 3: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ case 2: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ case 1: *dst = (val&0xff); dst += ddir; val >>= 8; __sz--; \ } \ } \ } while(0) /* calculate a - *cp (carry) (store result in b), *cp is updated! */ #define SUBc(a, cp, b) do { \ byte __x = (a); \ byte __y = (__x - (*(cp))); \ (*cp) = (__y > __x); \ *(b) = ~__y; \ } while(0) static int fmt_int(byte *buf, Uint sz, Eterm val, Uint size, Uint flags) { unsigned long offs; offs = BIT_OFFSET(size); if (is_small(val)) { Sint v = signed_val(val); ASSERT(size != 0); /* Tested by caller */ if (flags & BSF_LITTLE) { /* Little endian */ sz--; COPY_VAL(buf,1,v,sz); *buf = offs ? ((v << (8-offs)) & 0xff) : (v & 0xff); } else { /* Big endian */ buf += (sz - 1); if (offs) { *buf-- = (v << (8-offs)) & 0xff; sz--; v >>= offs; } COPY_VAL(buf,-1,v,sz); } } else if (is_big(val)) { int sign = big_sign(val); Uint ds = big_size(val)*sizeof(ErtsDigit); /* number of digits bytes */ ErtsDigit* dp = big_v(val); int n = MIN(sz,ds); if (size == 0) { return 0; } if (flags & BSF_LITTLE) { sz -= n; /* pad with this amount */ if (sign) { int c = 1; while(n >= sizeof(ErtsDigit)) { ErtsDigit d = *dp++; int i; for(i = 0; i < sizeof(ErtsDigit); ++i) { SUBc((d&0xff), &c, buf); buf++; d >>= 8; } n -= sizeof(ErtsDigit); } if (n) { ErtsDigit d = *dp; do { SUBc((d&0xff), &c, buf); buf++; d >>= 8; } while (--n > 0); } /* pad */ while(sz--) { SUBc(0, &c, buf); buf++; } } else { while(n >= sizeof(ErtsDigit)) { ErtsDigit d = *dp++; int i; for(i = 0; i < sizeof(ErtsDigit); ++i) { *buf++ = (d & 0xff); d >>= 8; } n -= sizeof(ErtsDigit); } if (n) { ErtsDigit d = *dp; do { *buf++ = (d & 0xff); d >>= 8; } while (--n > 0); } /* pad */ while(sz) { *buf++ = 0; sz--; } } /* adjust MSB!!! */ if (offs) { buf--; *buf <<= (8 - offs); } } else { /* BIG ENDIAN */ ErtsDigit acc = 0; ErtsDigit d; buf += (sz - 1); /* end of buffer */ sz -= n; /* pad with this amount */ offs = offs ? (8-offs) : 0; /* shift offset */ if (sign) { /* SIGNED */ int c = 1; while (n >= sizeof(ErtsDigit)) { int i; d = *dp++; acc |= d << offs; SUBc((acc&0xff), &c, buf); buf--; acc = d >> (8-offs); for (i = 0; i < sizeof(ErtsDigit)-1; ++i) { SUBc((acc&0xff), &c, buf); buf--; acc >>= 8; } n -= sizeof(ErtsDigit); } if (n) { acc |= ((ErtsDigit)*dp << offs); do { SUBc((acc & 0xff), &c, buf); buf--; acc >>= 8; } while (--n > 0); } /* pad */ while(sz--) { SUBc((acc & 0xff), &c, buf); buf--; acc >>= 8; } } else { /* UNSIGNED */ while (n >= sizeof(ErtsDigit)) { int i; d = *dp++; acc |= d << offs; *buf-- = acc; acc = d >> (8-offs); for (i = 0; i < sizeof(ErtsDigit)-1; ++i) { *buf-- = acc; acc >>= 8; } n -= sizeof(ErtsDigit); } if (n) { acc |= ((ErtsDigit)*dp << offs); do { *buf-- = acc & 0xff; acc >>= 8; } while (--n > 0); } while (sz--) { *buf-- = acc & 0xff; acc >>= 8; } } } } else { /* Neither small nor big */ return -1; } return 0; } static void ERTS_INLINE need_byte_buf(ERL_BITS_PROTO_1(int need)) { if (byte_buf_len < need) { erts_atomic_add_nob(&bits_bufs_size, need - byte_buf_len); byte_buf_len = need; byte_buf = erts_realloc(ERTS_ALC_T_BITS_BUF, byte_buf, byte_buf_len); } } int erts_new_bs_put_integer(ERL_BITS_PROTO_3(Eterm arg, Uint num_bits, unsigned flags)) { Uint bin_offset = erts_bin_offset; Uint bit_offset; Uint b; byte *iptr; bit_offset = BIT_OFFSET(bin_offset); if (is_small(arg)) { Uint rbits = 8 - bit_offset; if (num_bits == 0) { return 1; } else if (bit_offset + num_bits <= 8) { /* * All bits are in the same byte. */ iptr = erts_current_bin+BYTE_OFFSET(bin_offset); b = *iptr & (0xff << rbits); b |= (signed_val(arg) & ((1 << num_bits)-1)) << (8-bit_offset-num_bits); *iptr = b; } else if (bit_offset == 0) { /* * More than one bit, starting at a byte boundary. * That will be quite efficiently handled by fmt_int(). * * (We know that fmt_int() can't fail here.) */ (void) fmt_int(erts_current_bin+BYTE_OFFSET(bin_offset), NBYTES(num_bits), arg, num_bits, flags); } else if (flags & BSF_LITTLE) { /* * Can't handle unaligned little-endian in a simple way. */ goto unaligned; } else { /* Big endian */ /* * Big-endian, more than one byte, but not aligned on a byte boundary. * Handle the bits up to the next byte boundary specially, * then let fmt_int() handle the rest. */ Uint shift_count = num_bits - rbits; Sint val = signed_val(arg); iptr = erts_current_bin+BYTE_OFFSET(bin_offset); b = *iptr & (0xff << rbits); /* * Shifting with a shift count greater than or equal to the word * size may be a no-op (instead of 0 the result may be the unshifted * value). Therefore, only do the shift and the OR if the shift count * is less than the word size if the number is positive; if negative, * we must simulate the sign extension. */ if (shift_count < sizeof(Uint)*8) { b |= (val >> shift_count) & ((1 << rbits) - 1); } else if (val < 0) { /* Simulate sign extension. */ b |= (-1) & ((1 << rbits) - 1); } *iptr++ = b; /* fmt_int() can't fail here. */ (void) fmt_int(iptr, NBYTES(num_bits-rbits), arg, num_bits-rbits, flags); } } else if (bit_offset == 0) { /* * Big number, aligned on a byte boundary. We can format the * integer directly into the binary. */ if (fmt_int(erts_current_bin+BYTE_OFFSET(bin_offset), NBYTES(num_bits), arg, num_bits, flags) < 0) { return 0; } } else { unaligned: /* * Big number or small little-endian number, not byte-aligned, * or not a number at all. * * We must format the number into a temporary buffer, and then * copy that into the binary. */ need_byte_buf(ERL_BITS_ARGS_1(NBYTES(num_bits))); iptr = byte_buf; if (fmt_int(iptr, NBYTES(num_bits), arg, num_bits, flags) < 0) { return 0; } erts_copy_bits(iptr, 0, 1, erts_current_bin, bin_offset, 1, num_bits); } erts_bin_offset = bin_offset + num_bits; return 1; } int erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm arg)) { Uint bin_offset = erts_bin_offset; Uint bit_offset; Uint num_bits; byte tmp_buf[4]; byte* dst; Sint val; if (is_not_small(arg)) { return 0; } val = signed_val(arg); if (val < 0) { return 0; } if ((bit_offset = BIT_OFFSET(bin_offset)) == 0) { /* We can write directly into the destination binary. */ dst = erts_current_bin+BYTE_OFFSET(bin_offset); } else { /* Unaligned destination binary. Must use a temporary buffer. */ dst = tmp_buf; } if (val < 0x80) { dst[0] = val; num_bits = 8; } else if (val < 0x800) { dst[0] = 0xC0 | (val >> 6); dst[1] = 0x80 | (val & 0x3F); num_bits = 16; } else if (val < 0x10000UL) { if (0xD800 <= val && val <= 0xDFFF) { return 0; } dst[0] = 0xE0 | (val >> 12); dst[1] = 0x80 | ((val >> 6) & 0x3F); dst[2] = 0x80 | (val & 0x3F); num_bits = 24; } else if (val < 0x110000) { dst[0] = 0xF0 | (val >> 18); dst[1] = 0x80 | ((val >> 12) & 0x3F); dst[2] = 0x80 | ((val >> 6) & 0x3F); dst[3] = 0x80 | (val & 0x3F); num_bits = 32; } else { return 0; } if (bin_offset != 0) { erts_copy_bits(dst, 0, 1, erts_current_bin, bin_offset, 1, num_bits); } erts_bin_offset += num_bits; return 1; } int erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags)) { Uint bin_offset = erts_bin_offset; Uint bit_offset; Uint num_bits; byte tmp_buf[4]; byte* dst; Uint val; if (is_not_small(arg)) { return 0; } val = unsigned_val(arg); if (val > 0x10FFFF || (0xD800 <= val && val <= 0xDFFF)) { return 0; } if ((bit_offset = BIT_OFFSET(bin_offset)) == 0) { /* We can write directly into the destination binary. */ dst = erts_current_bin+BYTE_OFFSET(bin_offset); } else { /* Unaligned destination binary. Must use a temporary buffer. */ dst = tmp_buf; } if (val < 0x10000UL) { num_bits = 16; if (flags & BSF_LITTLE) { dst[0] = val; dst[1] = val >> 8; } else { dst[0] = val >> 8; dst[1] = val; } } else { Uint16 w1, w2; num_bits = 32; val = val - 0x10000UL; w1 = 0xD800 | (val >> 10); w2 = 0xDC00 | (val & 0x3FF); if (flags & BSF_LITTLE) { dst[0] = w1; dst[1] = w1 >> 8; dst[2] = w2; dst[3] = w2 >> 8; } else { dst[0] = w1 >> 8; dst[1] = w1; dst[2] = w2 >> 8; dst[3] = w2; } } if (bin_offset != 0) { erts_copy_bits(dst, 0, 1, erts_current_bin, bin_offset, 1, num_bits); } erts_bin_offset += num_bits; return 1; } int erts_new_bs_put_binary(Process *c_p, Eterm arg, Uint num_bits) { byte *bptr; Uint bitoffs; Uint bitsize; ERL_BITS_DEFINE_STATEP(c_p); if (!is_binary(arg)) { return 0; } ERTS_GET_BINARY_BYTES(arg, bptr, bitoffs, bitsize); if (num_bits > 8*binary_size(arg)+bitsize) { return 0; } copy_binary_to_buffer(erts_current_bin, erts_bin_offset, bptr, bitoffs, num_bits); erts_bin_offset += num_bits; BUMP_REDS(c_p, num_bits / BITS_PER_REDUCTION); return 1; } int erts_new_bs_put_binary_all(Process *c_p, Eterm arg, Uint unit) { byte *bptr; Uint bitoffs; Uint bitsize; Uint num_bits; ERL_BITS_DEFINE_STATEP(c_p); /* * This type test is not needed if the code was compiled with * an R12B or later compiler, since there would have been a * call to bit_size/1 or byte_size/1 that would have failed if * 'arg' was not a binary. However, in R11B and earlier releases, * size/1 was use for calculating the size of the binary, and * therefore 'arg' could be a tuple. */ if (!is_binary(arg)) { return 0; } ERTS_GET_BINARY_BYTES(arg, bptr, bitoffs, bitsize); num_bits = 8*binary_size(arg)+bitsize; if (unit == 8) { if (bitsize != 0) { return 0; } } else if (unit != 1 && num_bits % unit != 0) { return 0; } copy_binary_to_buffer(erts_current_bin, erts_bin_offset, bptr, bitoffs, num_bits); erts_bin_offset += num_bits; BUMP_REDS(c_p, num_bits / BITS_PER_REDUCTION); return 1; } int erts_new_bs_put_float(Process *c_p, Eterm arg, Uint num_bits, int flags) { ERL_BITS_DEFINE_STATEP(c_p); if (BIT_OFFSET(erts_bin_offset) == 0) { Uint32 a; Uint32 b; if (num_bits == 64) { union { double f64; Uint32 i32[2]; } u; if (is_float(arg)) { FloatDef *fdp = (FloatDef*)(float_val(arg) + 1); #ifdef DOUBLE_MIDDLE_ENDIAN a = fdp->fw[1]; b = fdp->fw[0]; #else a = fdp->fw[0]; b = fdp->fw[1]; #endif } else if (is_small(arg)) { u.f64 = (double) signed_val(arg); #ifdef DOUBLE_MIDDLE_ENDIAN a = u.i32[1]; b = u.i32[0]; #else a = u.i32[0]; b = u.i32[1]; #endif } else if (is_big(arg)) { if (big_to_double(arg, &u.f64) < 0) { return 0; } #ifdef DOUBLE_MIDDLE_ENDIAN a = u.i32[1]; b = u.i32[0]; #else a = u.i32[0]; b = u.i32[1]; #endif } else { return 0; } } else if (num_bits == 32) { union { float f32; Uint32 i32; } u; b = 0; if (is_float(arg)) { FloatDef f; GET_DOUBLE(arg, f); ERTS_FP_CHECK_INIT(c_p); u.f32 = f.fd; ERTS_FP_ERROR(c_p,u.f32,;); a = u.i32; } else if (is_small(arg)) { u.f32 = (float) signed_val(arg); a = u.i32; } else if (is_big(arg)) { double f64; if (big_to_double(arg, &f64) < 0) { return 0; } ERTS_FP_CHECK_INIT(c_p); u.f32 = (float) f64; ERTS_FP_ERROR(c_p,u.f32,;); a = u.i32; } else { return 0; } } else if (num_bits == 16) { union { erlfp16 f16; Uint16 i16; } u; b = 0; if (is_float(arg)) { FloatDef f; GET_DOUBLE(arg, f); ERTS_FP_CHECK_INIT(c_p); ERTS_FP_ERROR(c_p,f.fd,;); u.f16 = FP16_FROM_FP64(f.fd); a = u.i16; } else if (is_small(arg)) { u.f16 = FP16_FROM_FP64(signed_val(arg)); a = u.i16; } else if (is_big(arg)) { double f64; if (big_to_double(arg, &f64) < 0) { return 0; } ERTS_FP_CHECK_INIT(c_p); ERTS_FP_ERROR(c_p,f64,;); u.f16 = FP16_FROM_FP64(f64); a = u.i16; } else { return 0; } } else { return 0; } if (BIT_IS_MACHINE_ENDIAN(flags)) { byte* t = erts_current_bin+BYTE_OFFSET(erts_bin_offset); #ifdef WORDS_BIGENDIAN if (num_bits == 16) { t[0] = a >> 8; t[1] = a; } else if (num_bits >= 32) { t[0] = a >> 24; t[1] = a >> 16; t[2] = a >> 8; t[3] = a; if (num_bits == 64) { t[4] = b >> 24; t[5] = b >> 16; t[6] = b >> 8; t[7] = b; } } #else if (num_bits >= 32) { t[3] = a >> 24; t[2] = a >> 16; } t[1] = a >> 8; t[0] = a; if (num_bits == 64) { t[7] = b >> 24; t[6] = b >> 16; t[5] = b >> 8; t[4] = b; } #endif } else { byte* t = erts_current_bin+BYTE_OFFSET(erts_bin_offset) + NBYTES(num_bits); #ifdef WORDS_BIGENDIAN if (num_bits == 16) { t[-1] = a >> 8; t[-2] = a; } else if (num_bits >= 32) { t[-1] = a >> 24; t[-2] = a >> 16; t[-3] = a >> 8; t[-4] = a; if (num_bits == 64) { t[-5] = b >> 24; t[-6] = b >> 16; t[-7] = b >> 8; t[-8] = b; } } #else t[-1] = a; t[-2] = a >> 8; if (num_bits >= 32) { t[-3] = a >> 16; t[-4] = a >> 24; } if (num_bits == 64) { t[-5] = b; t[-6] = b >> 8; t[-7] = b >> 16; t[-8] = b >> 24; } #endif } } else { byte *bptr; double f64; float f32; erlfp16 f16; #ifdef DOUBLE_MIDDLE_ENDIAN FloatDef fbuf, ftmp; #endif if (num_bits == 64) { if (is_float(arg)) { #ifdef DOUBLE_MIDDLE_ENDIAN FloatDef *fdp = (FloatDef*)(float_val(arg) + 1); ftmp = *fdp; #else bptr = (byte *) (float_val(arg) + 1); #endif } else if (is_small(arg)) { f64 = (double) signed_val(arg); #ifdef DOUBLE_MIDDLE_ENDIAN ftmp.fd = f64; #else bptr = (byte *) &f64; #endif } else if (is_big(arg)) { if (big_to_double(arg, &f64) < 0) { return 0; } #ifdef DOUBLE_MIDDLE_ENDIAN ftmp.fd = f64; #else bptr = (byte *) &f64; #endif } else { return 0; } #ifdef DOUBLE_MIDDLE_ENDIAN fbuf.fw[0] = ftmp.fw[1]; fbuf.fw[1] = ftmp.fw[0]; bptr = fbuf.fb; #endif } else if (num_bits == 32) { if (is_float(arg)) { FloatDef f; GET_DOUBLE(arg, f); ERTS_FP_CHECK_INIT(c_p); f32 = f.fd; ERTS_FP_ERROR(c_p,f32,;); bptr = (byte *) &f32; } else if (is_small(arg)) { f32 = (float) signed_val(arg); bptr = (byte *) &f32; } else if (is_big(arg)) { if (big_to_double(arg, &f64) < 0) { return 0; } ERTS_FP_CHECK_INIT(c_p); f32 = (float) f64; ERTS_FP_ERROR(c_p,f32,;); bptr = (byte *) &f32; } else { return 0; } } else if (num_bits == 16) { if (is_float(arg)) { FloatDef f; GET_DOUBLE(arg, f); ERTS_FP_CHECK_INIT(c_p); ERTS_FP_ERROR(c_p,f.fd,;); f16 = FP16_FROM_FP64(f.fd); bptr = (byte *) &f16; } else if (is_small(arg)) { f16 = FP16_FROM_FP64(signed_val(arg)); bptr = (byte *) &f16; } else if (is_big(arg)) { if (big_to_double(arg, &f64) < 0) { return 0; } ERTS_FP_CHECK_INIT(c_p); ERTS_FP_ERROR(c_p,f64,;); f16 = FP16_FROM_FP64(f64); bptr = (byte *) &f16; } else { return 0; } } else { return 0; } if (BIT_IS_MACHINE_ENDIAN(flags)) { erts_copy_bits(bptr, 0, 1, erts_current_bin, erts_bin_offset, 1, num_bits); } else { erts_copy_bits(bptr+NBYTES(num_bits)-1, 0, -1, erts_current_bin, erts_bin_offset, 1, num_bits); } } erts_bin_offset += num_bits; return 1; } void erts_new_bs_put_string(ERL_BITS_PROTO_2(byte* iptr, Uint num_bytes)) { if (BIT_OFFSET(erts_bin_offset) != 0) { erts_copy_bits(iptr, 0, 1, erts_current_bin, erts_bin_offset, 1, num_bytes*8); } else { sys_memcpy(erts_current_bin+BYTE_OFFSET(erts_bin_offset), iptr, num_bytes); } erts_bin_offset += num_bytes*8; } Eterm erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term, Uint extra_words, Uint unit) { Eterm bin; /* Given binary */ Eterm* ptr; Eterm hdr; ErlSubBin* sb; ProcBin* pb; Binary* binp; Uint heap_need; Uint build_size_in_bits; Uint used_size_in_bits; Uint unsigned_bits; ERL_BITS_DEFINE_STATEP(c_p); /* * Check and untag the requested build size. */ if (is_small(build_size_term)) { Sint signed_bits = signed_val(build_size_term); if (signed_bits < 0) { goto badarg; } build_size_in_bits = (Uint) signed_bits; } else if (term_to_Uint(build_size_term, &unsigned_bits)) { build_size_in_bits = unsigned_bits; } else { c_p->freason = unsigned_bits; return THE_NON_VALUE; } /* * Check the binary argument. */ bin = reg[live]; if (!is_boxed(bin)) { badarg: c_p->freason = BADARG; return THE_NON_VALUE; } ptr = boxed_val(bin); hdr = *ptr; if (!is_binary_header(hdr)) { goto badarg; } if (hdr != HEADER_SUB_BIN) { goto not_writable; } sb = (ErlSubBin *) ptr; if (!sb->is_writable) { goto not_writable; } pb = (ProcBin *) boxed_val(sb->orig); ASSERT(pb->thing_word == HEADER_PROC_BIN); if ((pb->flags & PB_IS_WRITABLE) == 0) { goto not_writable; } /* * OK, the binary is writable. */ erts_bin_offset = 8*sb->size + sb->bitsize; if (unit > 1) { if ((unit == 8 && (erts_bin_offset & 7) != 0) || (erts_bin_offset % unit) != 0) { goto badarg; } } if (build_size_in_bits == 0) { if (HeapWordsLeft(c_p) < extra_words) { (void) erts_garbage_collect(c_p, extra_words, reg, live+1); bin = reg[live]; } return bin; } if((ERTS_UINT_MAX - build_size_in_bits) < erts_bin_offset) { c_p->freason = SYSTEM_LIMIT; return THE_NON_VALUE; } used_size_in_bits = erts_bin_offset + build_size_in_bits; sb->is_writable = 0; /* Make sure that no one else can write. */ pb->size = NBYTES(used_size_in_bits); pb->flags |= PB_ACTIVE_WRITER; /* * Reallocate the binary if it is too small. */ binp = pb->val; if (binp->orig_size < pb->size) { Uint new_size = 2*pb->size; binp = erts_bin_realloc(binp, new_size); pb->val = binp; pb->bytes = (byte *) binp->orig_bytes; BUMP_REDS(c_p, pb->size / BITS_PER_REDUCTION); } erts_current_bin = pb->bytes; /* * Allocate heap space and build a new sub binary. */ reg[live] = sb->orig; heap_need = ERL_SUB_BIN_SIZE + extra_words; if (HeapWordsLeft(c_p) < heap_need) { (void) erts_garbage_collect(c_p, heap_need, reg, live+1); } sb = (ErlSubBin *) c_p->htop; c_p->htop += ERL_SUB_BIN_SIZE; sb->thing_word = HEADER_SUB_BIN; sb->size = BYTE_OFFSET(used_size_in_bits); sb->bitsize = BIT_OFFSET(used_size_in_bits); sb->offs = 0; sb->bitoffs = 0; sb->is_writable = 1; sb->orig = reg[live]; return make_binary(sb); /* * The binary is not writable. We must create a new writable binary and * copy the old contents of the binary. */ not_writable: { Uint used_size_in_bytes; /* Size of old binary + data to be built */ Uint bin_size; Binary* bptr; byte* src_bytes; Uint bitoffs; Uint bitsize; Eterm* hp; /* * Allocate heap space. */ heap_need = PROC_BIN_SIZE + ERL_SUB_BIN_SIZE + extra_words; if (HeapWordsLeft(c_p) < heap_need) { (void) erts_garbage_collect(c_p, heap_need, reg, live+1); bin = reg[live]; } hp = c_p->htop; /* * Calculate sizes. The size of the new binary, is the sum of the * build size and the size of the old binary. Allow some room * for growing. */ ERTS_GET_BINARY_BYTES(bin, src_bytes, bitoffs, bitsize); erts_bin_offset = 8*binary_size(bin) + bitsize; if (unit > 1) { if ((unit == 8 && (erts_bin_offset & 7) != 0) || (erts_bin_offset % unit) != 0) { goto badarg; } } if (build_size_in_bits == 0) { return bin; } if((ERTS_UINT_MAX - build_size_in_bits) < erts_bin_offset) { c_p->freason = SYSTEM_LIMIT; return THE_NON_VALUE; } used_size_in_bits = erts_bin_offset + build_size_in_bits; used_size_in_bytes = NBYTES(used_size_in_bits); if(used_size_in_bits < (ERTS_UINT_MAX / 2)) { bin_size = 2 * used_size_in_bytes; } else { bin_size = NBYTES(ERTS_UINT_MAX); } bin_size = (bin_size < 256) ? 256 : bin_size; /* * Allocate the binary data struct itself. */ bptr = erts_bin_nrml_alloc(bin_size); erts_current_bin = (byte *) bptr->orig_bytes; /* * Now allocate the ProcBin on the heap. */ pb = (ProcBin *) hp; hp += PROC_BIN_SIZE; pb->thing_word = HEADER_PROC_BIN; pb->size = used_size_in_bytes; pb->next = MSO(c_p).first; MSO(c_p).first = (struct erl_off_heap_header*)pb; pb->val = bptr; pb->bytes = (byte*) bptr->orig_bytes; pb->flags = PB_IS_WRITABLE | PB_ACTIVE_WRITER; OH_OVERHEAD(&(MSO(c_p)), pb->size / sizeof(Eterm)); /* * Now allocate the sub binary and set its size to include the * data about to be built. */ sb = (ErlSubBin *) hp; hp += ERL_SUB_BIN_SIZE; sb->thing_word = HEADER_SUB_BIN; sb->size = BYTE_OFFSET(used_size_in_bits); sb->bitsize = BIT_OFFSET(used_size_in_bits); sb->offs = 0; sb->bitoffs = 0; sb->is_writable = 1; sb->orig = make_binary(pb); c_p->htop = hp; /* * Now copy the data into the binary. */ copy_binary_to_buffer(erts_current_bin, 0, src_bytes, bitoffs, erts_bin_offset); BUMP_REDS(c_p, erts_bin_offset / BITS_PER_REDUCTION); return make_binary(sb); } } Eterm erts_bs_private_append(Process* p, Eterm bin, Eterm build_size_term, Uint unit) { Eterm* ptr; ErlSubBin* sb; ProcBin* pb; Binary* binp; Uint build_size_in_bits; Uint pos_in_bits_after_build; Uint unsigned_bits; ERL_BITS_DEFINE_STATEP(p); /* * Check and untag the requested build size. */ if (is_small(build_size_term)) { Sint signed_bits = signed_val(build_size_term); if (signed_bits < 0) { p->freason = BADARG; return THE_NON_VALUE; } build_size_in_bits = (Uint) signed_bits; } else if (term_to_Uint(build_size_term, &unsigned_bits)) { build_size_in_bits = unsigned_bits; } else { p->freason = unsigned_bits; return THE_NON_VALUE; } ptr = boxed_val(bin); ASSERT(*ptr == HEADER_SUB_BIN); sb = (ErlSubBin *) ptr; ASSERT(sb->is_writable); pb = (ProcBin *) boxed_val(sb->orig); ASSERT(pb->thing_word == HEADER_PROC_BIN); /* * Calculate new size in bytes. */ erts_bin_offset = 8*sb->size + sb->bitsize; if((ERTS_UINT_MAX - build_size_in_bits) < erts_bin_offset) { p->freason = SYSTEM_LIMIT; return THE_NON_VALUE; } pos_in_bits_after_build = erts_bin_offset + build_size_in_bits; pb->size = (pos_in_bits_after_build+7) >> 3; pb->flags |= PB_ACTIVE_WRITER; /* * Reallocate the binary if it is too small. */ binp = pb->val; if (binp->orig_size < pb->size) { Uint new_size = 2*pb->size; BUMP_REDS(p, pb->size / BITS_PER_REDUCTION); if (pb->flags & PB_IS_WRITABLE) { /* * This is the normal case - the binary is writable. * There are no other references to the binary, so it * is safe to reallocate it. */ binp = erts_bin_realloc(binp, new_size); pb->val = binp; pb->bytes = (byte *) binp->orig_bytes; } else { /* * The binary is NOT writable. The only way that is * supposed to happen if is call trace has been turned * on. That means that a trace process now has (or have * had) a reference to the binary, so we are not allowed * to reallocate the binary. Instead, we must allocate a new * binary and copy the contents of the old binary into it. */ Binary* bptr = erts_bin_nrml_alloc(new_size); sys_memcpy(bptr->orig_bytes, binp->orig_bytes, binp->orig_size); pb->flags |= PB_IS_WRITABLE | PB_ACTIVE_WRITER; pb->val = bptr; pb->bytes = (byte *) bptr->orig_bytes; erts_bin_release(binp); } } erts_current_bin = pb->bytes; sb->size = pos_in_bits_after_build >> 3; sb->bitsize = pos_in_bits_after_build & 7; return bin; } Eterm erts_bs_init_writable(Process* p, Eterm sz) { Uint bin_size = 1024; Uint heap_need; Binary* bptr; ProcBin* pb; ErlSubBin* sb; Eterm* hp; if (is_small(sz)) { Sint s = signed_val(sz); if (s >= 0) { bin_size = (Uint) s; } } /* * Allocate heap space. */ heap_need = PROC_BIN_SIZE + ERL_SUB_BIN_SIZE; if (HeapWordsLeft(p) < heap_need) { (void) erts_garbage_collect(p, heap_need, NULL, 0); } hp = p->htop; /* * Allocate the binary data struct itself. */ bptr = erts_bin_nrml_alloc(bin_size); /* * Now allocate the ProcBin on the heap. */ pb = (ProcBin *) hp; hp += PROC_BIN_SIZE; pb->thing_word = HEADER_PROC_BIN; pb->size = 0; pb->next = MSO(p).first; MSO(p).first = (struct erl_off_heap_header*) pb; pb->val = bptr; pb->bytes = (byte*) bptr->orig_bytes; pb->flags = PB_IS_WRITABLE | PB_ACTIVE_WRITER; OH_OVERHEAD(&(MSO(p)), pb->size / sizeof(Eterm)); /* * Now allocate the sub binary. */ sb = (ErlSubBin *) hp; hp += ERL_SUB_BIN_SIZE; sb->thing_word = HEADER_SUB_BIN; sb->size = 0; sb->offs = 0; sb->bitsize = 0; sb->bitoffs = 0; sb->is_writable = 1; sb->orig = make_binary(pb); p->htop = hp; return make_binary(sb); } void erts_emasculate_writable_binary(ProcBin* pb) { Binary* binp; Uint unused; pb->flags = 0; binp = pb->val; ASSERT(binp->orig_size >= pb->size); unused = binp->orig_size - pb->size; /* Our allocators are 8 byte aligned, i.e., shrinking with less than 8 bytes will have no real effect */ if (unused >= 8) { binp = erts_bin_realloc(binp, pb->size); pb->val = binp; pb->bytes = (byte *) binp->orig_bytes; } } Uint32 erts_bs_get_unaligned_uint32(ErlBinMatchBuffer* mb) { Uint bytes; Uint offs; byte bigbuf[4]; byte* LSB; byte* MSB; CHECK_MATCH_BUFFER(mb); ASSERT((mb->offset & 7) != 0); ASSERT(mb->size - mb->offset >= 32); bytes = 4; offs = 0; LSB = bigbuf; MSB = LSB + bytes - 1; *MSB = 0; erts_copy_bits(mb->base, mb->offset, 1, MSB, offs, -1, 32); return LSB[0] | (LSB[1]<<8) | (LSB[2]<<16) | (LSB[3]<<24); } static void erts_align_utf8_bytes(ErlBinMatchBuffer* mb, byte* buf) { Uint bits = mb->size - mb->offset; /* * Copy up to 4 bytes into the supplied buffer. */ ASSERT(bits >= 8); if (bits <= 15) { bits = 8; } else if (bits >= 32) { bits = 32; } else if (bits >= 24) { bits = 24; } else { bits = 16; } erts_copy_bits(mb->base, mb->offset, 1, buf, 0, 1, bits); } Eterm erts_bs_get_utf8(ErlBinMatchBuffer* mb) { Eterm result; Uint remaining_bits; byte* pos; byte tmp_buf[4]; Eterm a, b, c; /* * Number of trailing bytes for each value of the first byte. */ static const byte erts_trailing_bytes_for_utf8[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,9,9,9,9,9,9,9,9 }; CHECK_MATCH_BUFFER(mb); if ((remaining_bits = mb->size - mb->offset) < 8) { return THE_NON_VALUE; } if (BIT_OFFSET(mb->offset) == 0) { pos = mb->base + BYTE_OFFSET(mb->offset); } else { erts_align_utf8_bytes(mb, tmp_buf); pos = tmp_buf; } result = pos[0]; switch (erts_trailing_bytes_for_utf8[result]) { case 0: /* One byte only */ mb->offset += 8; break; case 1: /* Two bytes */ if (remaining_bits < 16) { return THE_NON_VALUE; } a = pos[1]; if ((a & 0xC0) != 0x80) { return THE_NON_VALUE; } result = (result << 6) + a - (Eterm) 0x00003080UL; mb->offset += 16; break; case 2: /* Three bytes */ if (remaining_bits < 24) { return THE_NON_VALUE; } a = pos[1]; b = pos[2]; if ((a & 0xC0) != 0x80 || (b & 0xC0) != 0x80 || (result == 0xE0 && a < 0xA0)) { return THE_NON_VALUE; } result = (((result << 6) + a) << 6) + b - (Eterm) 0x000E2080UL; if (0xD800 <= result && result <= 0xDFFF) { return THE_NON_VALUE; } mb->offset += 24; break; case 3: /* Four bytes */ if (remaining_bits < 32) { return THE_NON_VALUE; } a = pos[1]; b = pos[2]; c = pos[3]; if ((a & 0xC0) != 0x80 || (b & 0xC0) != 0x80 || (c & 0xC0) != 0x80 || (result == 0xF0 && a < 0x90)) { return THE_NON_VALUE; } result = (((((result << 6) + a) << 6) + b) << 6) + c - (Eterm) 0x03C82080UL; if (result > 0x10FFFF) { return THE_NON_VALUE; } mb->offset += 32; break; default: return THE_NON_VALUE; } return make_small(result); } Eterm erts_bs_get_utf16(ErlBinMatchBuffer* mb, Uint flags) { Uint bit_offset; Uint num_bits = mb->size - mb->offset; byte* src; byte tmp_buf[4]; Uint16 w1; Uint16 w2; if (num_bits < 16) { return THE_NON_VALUE; } CHECK_MATCH_BUFFER(mb); /* * Set up the pointer to the source bytes. */ if ((bit_offset = BIT_OFFSET(mb->offset)) == 0) { /* We can access the binary directly because the bytes are aligned. */ src = mb->base + BYTE_OFFSET(mb->offset); } else { /* * We must copy the data to a temporary buffer. If possible, * get 4 bytes, otherwise two bytes. */ Uint n = num_bits < 32 ? 16 : 32; erts_copy_bits(mb->base, mb->offset, 1, tmp_buf, 0, 1, n); src = tmp_buf; } /* * Get the first (and maybe only) 16-bit word. See if we are done. */ if (flags & BSF_LITTLE) { w1 = src[0] | (src[1] << 8); } else { w1 = (src[0] << 8) | src[1]; } if (w1 < 0xD800 || w1 > 0xDFFF) { mb->offset += 16; return make_small(w1); } else if (w1 > 0xDBFF) { return THE_NON_VALUE; } /* * Get the second 16-bit word and combine it with the first. */ if (num_bits < 32) { return THE_NON_VALUE; } else if (flags & BSF_LITTLE) { w2 = src[2] | (src[3] << 8); } else { w2 = (src[2] << 8) | src[3]; } if (!(0xDC00 <= w2 && w2 <= 0xDFFF)) { return THE_NON_VALUE; } mb->offset += 32; return make_small((((w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000UL); } static byte get_bit(byte b, size_t offs) { return (b >> (7-offs)) & 1; } int erts_cmp_bits(byte* a_ptr, size_t a_offs, byte* b_ptr, size_t b_offs, size_t size) { byte a; byte b; byte a_bit; byte b_bit; Uint lshift; Uint rshift; int cmp; ASSERT(a_offs < 8 && b_offs < 8); if (size == 0) return 0; if (((a_offs | b_offs | size) & 7) == 0) { int byte_size = size >> 3; return sys_memcmp(a_ptr, b_ptr, byte_size); } /* Compare bit by bit until a_ptr is aligned on byte boundary */ a = *a_ptr++; b = *b_ptr++; if (a_offs) { for (;;) { a_bit = get_bit(a, a_offs); b_bit = get_bit(b, b_offs); if ((cmp = (a_bit-b_bit)) != 0) { return cmp; } if (--size == 0) return 0; b_offs++; if (b_offs == 8) { b_offs = 0; b = *b_ptr++; } a_offs++; if (a_offs == 8) { a_offs = 0; a = *a_ptr++; break; } } } /* Compare byte by byte as long as at least 8 bits remain */ if (size >= 8) { lshift = b_offs; rshift = 8 - lshift; for (;;) { byte b_cmp = (b << lshift); b = *b_ptr++; b_cmp |= b >> rshift; if ((cmp = (a - b_cmp)) != 0) { return cmp; } size -= 8; if (size < 8) break; a = *a_ptr++; } if (size == 0) return 0; a = *a_ptr++; } /* Compare the remaining bits bit by bit */ if (size > 0) { for (;;) { a_bit = get_bit(a, a_offs); b_bit = get_bit(b, b_offs); if ((cmp = (a_bit-b_bit)) != 0) { return cmp; } if (--size == 0) return 0; a_offs++; ASSERT(a_offs < 8); b_offs++; if (b_offs == 8) { b_offs = 0; b = *b_ptr++; } } } return 0; } /* * The basic bit copy operation. Copies n bits from the source buffer to * the destination buffer. Depending on the directions, it can reverse the * copied bits. */ void erts_copy_bits(byte* src, /* Base pointer to source. */ size_t soffs, /* Bit offset for source relative to src. */ int sdir, /* Direction: 1 (forward) or -1 (backward). */ byte* dst, /* Base pointer to destination. */ size_t doffs, /* Bit offset for destination relative to dst. */ int ddir, /* Direction: 1 (forward) or -1 (backward). */ size_t n) /* Number of bits to copy. */ { Uint lmask; Uint rmask; Uint count; Uint deoffs; if (n == 0) { return; } src += sdir*BYTE_OFFSET(soffs); dst += ddir*BYTE_OFFSET(doffs); soffs = BIT_OFFSET(soffs); doffs = BIT_OFFSET(doffs); deoffs = BIT_OFFSET(doffs+n); lmask = (doffs) ? MAKE_MASK(8-doffs) : 0; rmask = (deoffs) ? (MAKE_MASK(deoffs)<<(8-deoffs)) : 0; /* * Take care of the case that all bits are in the same byte. */ if (doffs+n < 8) { /* All bits are in the same byte */ lmask = (lmask & rmask) ? (lmask & rmask) : (lmask | rmask); if (soffs == doffs) { *dst = MASK_BITS(*src,*dst,lmask); } else if (soffs > doffs) { Uint bits = (*src << (soffs-doffs)); if (soffs+n > 8) { src += sdir; bits |= (*src >> (8-(soffs-doffs))); } *dst = MASK_BITS(bits,*dst,lmask); } else { *dst = MASK_BITS((*src >> (doffs-soffs)),*dst,lmask); } return; /* We are done! */ } /* * At this point, we know that the bits are in 2 or more bytes. */ count = ((lmask) ? (n - (8 - doffs)) : n) >> 3; if (soffs == doffs) { /* * The bits are aligned in the same way. We can just copy the bytes * (except for the first and last bytes). Note that the directions * might be different, so we can't just use memcpy(). */ if (lmask) { *dst = MASK_BITS(*src, *dst, lmask); dst += ddir; src += sdir; } while (count--) { *dst = *src; dst += ddir; src += sdir; } if (rmask) { *dst = MASK_BITS(*src,*dst,rmask); } } else { Uint bits; Uint bits1; Uint rshift; Uint lshift; /* * The tricky case. The bits must be shifted into position. */ if (soffs > doffs) { lshift = (soffs - doffs); rshift = 8 - lshift; bits = *src; if (soffs + n > 8) { src += sdir; } } else { rshift = (doffs - soffs); lshift = 8 - rshift; bits = 0; } if (lmask) { bits1 = bits << lshift; bits = *src; src += sdir; bits1 |= (bits >> rshift); *dst = MASK_BITS(bits1,*dst,lmask); dst += ddir; } while (count--) { bits1 = bits << lshift; bits = *src; src += sdir; *dst = bits1 | (bits >> rshift); dst += ddir; } if (rmask) { bits1 = bits << lshift; if ((rmask << rshift) & 0xff) { bits = *src; bits1 |= (bits >> rshift); } *dst = MASK_BITS(bits1,*dst,rmask); } } } Eterm erts_extract_sub_binary(Eterm **hp, Eterm base_bin, byte *base_data, Uint bit_offset, Uint bit_size) { Uint byte_offset, byte_size; ERTS_CT_ASSERT(ERL_SUB_BIN_SIZE <= ERL_ONHEAP_BIN_LIMIT); byte_offset = BYTE_OFFSET(bit_offset); byte_size = BYTE_OFFSET(bit_size); if (BIT_OFFSET(bit_size) == 0 && byte_size <= ERL_ONHEAP_BIN_LIMIT) { ErlHeapBin *hb = (ErlHeapBin*)*hp; *hp += heap_bin_size(byte_size); hb->thing_word = header_heap_bin(byte_size); hb->size = byte_size; copy_binary_to_buffer(hb->data, 0, base_data, bit_offset, bit_size); return make_binary(hb); } else { ErlSubBin *sb = (ErlSubBin*)*hp; *hp += ERL_SUB_BIN_SIZE; sb->thing_word = HEADER_SUB_BIN; sb->size = byte_size; sb->offs = byte_offset; sb->orig = base_bin; sb->bitoffs = BIT_OFFSET(bit_offset); sb->bitsize = BIT_OFFSET(bit_size); sb->is_writable = 0; return make_binary(sb); } }