diff options
Diffstat (limited to 'src/include/intpack.i')
-rw-r--r-- | src/include/intpack.i | 361 |
1 files changed, 361 insertions, 0 deletions
diff --git a/src/include/intpack.i b/src/include/intpack.i new file mode 100644 index 00000000000..dc67b1f745d --- /dev/null +++ b/src/include/intpack.i @@ -0,0 +1,361 @@ +/*- + * Copyright (c) 2008-2012 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +/* + * Variable-length integer encoding. + * We need up to 64 bits, signed and unsigned. Further, we want the packed + * representation to have the same lexicographic ordering as the integer + * values. This avoids the need for special-purpose comparison code. + * + * Try hard to keep small values small (up to ~2 bytes): that gives the biggest + * benefit for common cases storing small values. After that, just encode the + * length in the first byte: we could squeeze in a couple of extra bits, but + * the marginal benefit is small, and we want this code to be relatively + * easy to implement in client code or scripting APIs. + * + * First byte | Next | | + * byte | bytes| Min Value | Max Value + * ------------+------+------------------------+-------------------------------- + * [00 00xxxx] | free | N/A | N/A + * [00 01llll] | llll | -2^64 | -2^13 - 2^6 + * [00 1xxxxx] | 1 | -2^13 - 2^6 | -2^6 - 1 + * [01 xxxxxx] | 0 | -2^6 | -1 + * [10 xxxxxx] | 0 | 0 | 2^6 - 1 + * [11 0xxxxx] | 1 | 2^6 | 2^13 + 2^6 - 1 + * [11 10llll] | llll | 2^13 + 2^6 | 2^64 - 1 + * [11 11xxxx] | free | N/A | N/A + */ + +#define NEG_MULTI_MARKER (uint8_t)0x10 +#define NEG_2BYTE_MARKER (uint8_t)0x20 +#define NEG_1BYTE_MARKER (uint8_t)0x40 +#define POS_1BYTE_MARKER (uint8_t)0x80 +#define POS_2BYTE_MARKER (uint8_t)0xc0 +#define POS_MULTI_MARKER (uint8_t)0xe0 + +#define NEG_1BYTE_MIN ((-1) << 6) +#define NEG_2BYTE_MIN (((-1) << 13) + NEG_1BYTE_MIN) +#define POS_1BYTE_MAX ((1 << 6) - 1) +#define POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX) + +/* Extract bits <start> to <end> from a value (counting from LSB == 0). */ +#define GET_BITS(x, start, end) (((x) & ((1 << (start)) - 1)) >> (end)) + +#define WT_SIZE_CHECK(l, maxl) \ + WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM) + +/* Count the leading zero bytes. */ +#ifdef __GNUC__ +#define WT_LEADING_ZEROS(x, i) \ + (i = (x == 0) ? (int)sizeof (x) : __builtin_clzll(x) >> 3) +#else +#define WT_LEADING_ZEROS(x, i) do { \ + uint64_t __x = (x); \ + uint64_t __m = 0xff << 56; \ + for (i = 0; !(__x & __m) && i != 8; i++) \ + __m >>= 8; \ +} while (0) +#endif + +/* + * __wt_vpack_posint -- + * Packs a positive variable-length integer in the specified location. + */ +static inline int +__wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x) +{ + uint8_t *p; + int len, lz, shift; + + WT_LEADING_ZEROS(x, lz); + len = (int)sizeof (x) - lz; + WT_SIZE_CHECK(len + 1, maxlen); + p = *pp; + + /* There are four bits we can use in the first byte. */ + *p++ |= (len & 0xf); + + for (shift = (len - 1) << 3; len != 0; --len, shift -= 8) + *p++ = (x >> shift); + + *pp = p; + return (0); +} + +/* + * __wt_vpack_negint -- + * Packs a negative variable-length integer in the specified location. + */ +static inline int +__wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x) +{ + uint8_t *p; + int len, lz, shift; + + WT_LEADING_ZEROS(~x, lz); + len = (int)sizeof (x) - lz; + WT_SIZE_CHECK(len + 1, maxlen); + p = *pp; + + /* + * There are four size bits we can use in the first byte. + * For negative numbers, we store the number of leading 0xff bytes + * to maintain ordering (if this is not obvious, it may help to + * remember that -1 is the largest negative number). + */ + *p++ |= (lz & 0xf); + + for (shift = (len - 1) << 3; len != 0; shift -= 8, --len) + *p++ = (x >> shift); + + *pp = p; + return (0); +} + +/* + * __wt_vunpack_posint -- + * Reads a variable-length positive integer from the specified location. + */ +static inline int +__wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp) +{ + uint64_t x; + const uint8_t *p; + uint8_t len; + + /* There are four length bits in the first byte. */ + p = *pp; + len = (*p++ & 0xf); + WT_SIZE_CHECK(len + 1, maxlen); + + for (x = 0; len != 0; --len) + x = (x << 8) | *p++; + + *retp = x; + *pp = p; + return (0); +} + +/* + * __wt_vunpack_negint -- + * Reads a variable-length negative integer from the specified location. + */ +static inline int +__wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp) +{ + uint64_t x; + const uint8_t *p; + uint8_t len; + + /* There are four length bits in the first byte. */ + p = *pp; + len = (int)sizeof (x) - (*p++ & 0xf); + WT_SIZE_CHECK(len + 1, maxlen); + + for (x = UINT64_MAX; len != 0; --len) + x = (x << 8) | *p++; + + *retp = x; + *pp = p; + return (0); +} + +/* + * __wt_vpack_uint + * Variable-sized packing for unsigned integers + */ +static inline int +__wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x) +{ + uint8_t *p; + + WT_SIZE_CHECK(1, maxlen); + p = *pp; + if (x <= POS_1BYTE_MAX) + *p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0); + else if (x <= POS_2BYTE_MAX) { + WT_SIZE_CHECK(2, maxlen); + x -= POS_1BYTE_MAX + 1; + *p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8); + *p++ = GET_BITS(x, 8, 0); + } else { + x -= POS_2BYTE_MAX + 1; + *p = POS_MULTI_MARKER; + return (__wt_vpack_posint(pp, maxlen, x)); + } + + *pp = p; + return (0); +} + +/* + * __wt_vpack_int + * Variable-sized packing for signed integers + */ +static inline int +__wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) +{ + uint8_t *p; + + WT_SIZE_CHECK(1, maxlen); + p = *pp; + if (x < NEG_2BYTE_MIN) { + *p = NEG_MULTI_MARKER; + return (__wt_vpack_negint(pp, maxlen, (uint64_t)x)); + } else if (x < NEG_1BYTE_MIN) { + WT_SIZE_CHECK(2, maxlen); + x -= NEG_2BYTE_MIN; + *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8); + *p++ = GET_BITS(x, 8, 0); + } else if (x < 0) { + x -= NEG_1BYTE_MIN; + *p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0); + } else + /* For non-negative values, use the unsigned code above. */ + return (__wt_vpack_uint(pp, maxlen, (uint64_t)x)); + + *pp = p; + return (0); +} + +/* + * __wt_vunpack_uint + * Variable-sized unpacking for unsigned integers + */ +static inline int +__wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp) +{ + const uint8_t *p; + + WT_SIZE_CHECK(1, maxlen); + p = *pp; + switch (*p & 0xf0) { + case POS_1BYTE_MARKER: + case POS_1BYTE_MARKER | 0x10: + case POS_1BYTE_MARKER | 0x20: + case POS_1BYTE_MARKER | 0x30: + *xp = GET_BITS(*p, 6, 0); + p += 1; + break; + case POS_2BYTE_MARKER: + case POS_2BYTE_MARKER | 0x10: + WT_SIZE_CHECK(2, maxlen); + *xp = GET_BITS(*p++, 5, 0) << 8; + *xp |= *p++; + *xp += POS_1BYTE_MAX + 1; + break; + case POS_MULTI_MARKER: + WT_RET(__wt_vunpack_posint(pp, maxlen, xp)); + *xp += POS_2BYTE_MAX + 1; + return (0); + default: + return (EINVAL); + } + + *pp = p; + return (0); +} + +/* + * __wt_vunpack_int + * Variable-sized packing for signed integers + */ +static inline int +__wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp) +{ + const uint8_t *p; + + WT_SIZE_CHECK(1, maxlen); + p = *pp; + switch (*p & 0xf0) { + case NEG_MULTI_MARKER: + WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp)); + return (0); + case NEG_2BYTE_MARKER: + case NEG_2BYTE_MARKER | 0x10: + WT_SIZE_CHECK(2, maxlen); + *xp = GET_BITS(*p++, 5, 0) << 8; + *xp |= *p++; + *xp += NEG_2BYTE_MIN; + p += 2; + break; + case NEG_1BYTE_MARKER: + case NEG_1BYTE_MARKER | 0x10: + case NEG_1BYTE_MARKER | 0x20: + case NEG_1BYTE_MARKER | 0x30: + *xp = NEG_1BYTE_MIN + GET_BITS(*p, 6, 0); + p += 1; + break; + default: + /* Identical to the unsigned case. */ + return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp)); + } + + *pp = p; + return (0); +} + +/* + * __wt_vsize_posint -- + * Return the packed size of a positive variable-length integer. + */ +static inline size_t +__wt_vsize_posint(uint64_t x) +{ + int lz; + + WT_LEADING_ZEROS(x, lz); + return (WT_INTPACK64_MAXSIZE - lz); +} + +/* + * __wt_vsize_negint -- + * Return the packed size of a negative variable-length integer. + */ +static inline size_t +__wt_vsize_negint(uint64_t x) +{ + int lz; + + WT_LEADING_ZEROS(~x, lz); + return (size_t)(WT_INTPACK64_MAXSIZE - lz); +} + +/* + * __wt_vsize_uint + * Return the packed size of an unsigned integer. + */ +static inline size_t +__wt_vsize_uint(uint64_t x) +{ + if (x <= POS_1BYTE_MAX) + return (1); + else if (x <= POS_2BYTE_MAX) { + return (2); + } else { + x -= POS_2BYTE_MAX + 1; + return (__wt_vsize_posint(x)); + } +} + +/* + * __wt_vsize_int + * Return the packed size of a signed integer. + */ +static inline size_t +__wt_vsize_int(int64_t x) +{ + if (x < NEG_2BYTE_MIN) { + return (__wt_vsize_negint((uint64_t)x)); + } else if (x < NEG_1BYTE_MIN) { + return (2); + } else if (x < 0) { + return (1); + } else + /* For non-negative values, use the unsigned code above. */ + return (__wt_vsize_uint((uint64_t)x)); +} |