diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2021-10-06 18:50:56 +0530 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-10-21 12:56:59 +0300 |
commit | 8ce8c269f461b7a578cf0bb5cce18f1d83ebeb9b (patch) | |
tree | 3a36a4ca48bcea74ccfbc328ecff5e9647424d71 /storage/innobase | |
parent | 6b4fad94029bb73fbb3f9d05f2dfed09e83ec31c (diff) | |
download | mariadb-git-8ce8c269f461b7a578cf0bb5cce18f1d83ebeb9b.tar.gz |
MDEV-19522 InnoDB commit fails when FTS_DOC_ID value is greater than 4294967295
InnoDB commit fails when consecutive FTS_DOC_ID value
is greater than 4294967295.
Fix is that InnoDB should remove the delta FTS_DOC_ID
value limitations and fts should encode 8 byte value,
remove FTS_DOC_ID_MAX_STEP variable. Replaced the
fts0vlc.ic file with fts0vlc.h
fts_encode_int(): Should be able to encode 10 bytes value
fts_get_encoded_len(): Should get the length of the value
which has 10 bytes
fts_decode_vlc(): Add debug assertion to verify the maximum
length allowed is 10.
mach_read_uint64_little_endian(): Reads 64 bit stored in
little endian format
Added a unit test case which check for minimum and maximum
value to do the fts encoding
Diffstat (limited to 'storage/innobase')
-rw-r--r-- | storage/innobase/CMakeLists.txt | 4 | ||||
-rw-r--r-- | storage/innobase/fts/fts0fts.cc | 10 | ||||
-rw-r--r-- | storage/innobase/fts/fts0opt.cc | 10 | ||||
-rw-r--r-- | storage/innobase/fts/fts0que.cc | 7 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 13 | ||||
-rw-r--r-- | storage/innobase/handler/i_s.cc | 18 | ||||
-rw-r--r-- | storage/innobase/include/fts0fts.h | 4 | ||||
-rw-r--r-- | storage/innobase/include/fts0types.h | 33 | ||||
-rw-r--r-- | storage/innobase/include/fts0vlc.h | 124 | ||||
-rw-r--r-- | storage/innobase/include/fts0vlc.ic | 142 | ||||
-rw-r--r-- | storage/innobase/include/mach0data.h | 22 | ||||
-rw-r--r-- | storage/innobase/row/row0mysql.cc | 17 | ||||
-rw-r--r-- | storage/innobase/unittest/CMakeLists.txt | 22 | ||||
-rw-r--r-- | storage/innobase/unittest/innodb_fts-t.cc | 52 |
14 files changed, 247 insertions, 231 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index b965c4fbb45..ff646e00f39 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -188,3 +188,7 @@ IF(MSVC) ENDIF() ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup) + +IF(WITH_UNIT_TESTS) + ADD_SUBDIRECTORY(unittest) +ENDIF() diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 07077006096..bbe53f4d163 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -32,7 +32,7 @@ Full Text Search interface #include "fts0priv.h" #include "fts0types.h" #include "fts0types.ic" -#include "fts0vlc.ic" +#include "fts0vlc.h" #include "fts0plugin.h" #include "dict0priv.h" #include "dict0stats.h" @@ -1247,7 +1247,7 @@ fts_cache_node_add_positions( ulint enc_len; ulint last_pos; byte* ptr_start; - ulint doc_id_delta; + doc_id_t doc_id_delta; #ifdef UNIV_DEBUG if (cache) { @@ -1258,7 +1258,7 @@ fts_cache_node_add_positions( ut_ad(doc_id >= node->last_doc_id); /* Calculate the space required to store the ilist. */ - doc_id_delta = (ulint)(doc_id - node->last_doc_id); + doc_id_delta = doc_id - node->last_doc_id; enc_len = fts_get_encoded_len(doc_id_delta); last_pos = 0; @@ -1307,14 +1307,14 @@ fts_cache_node_add_positions( ptr_start = ptr; /* Encode the new fragment. */ - ptr += fts_encode_int(doc_id_delta, ptr); + ptr = fts_encode_int(doc_id_delta, ptr); last_pos = 0; for (i = 0; i < ib_vector_size(positions); i++) { ulint pos = *(static_cast<ulint*>( ib_vector_get(positions, i))); - ptr += fts_encode_int(pos - last_pos, ptr); + ptr = fts_encode_int(pos - last_pos, ptr); last_pos = pos; } diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index c4cbbfafff4..2d506a757a0 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -36,6 +36,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang #include "ut0list.h" #include "zlib.h" #include "fts0opt.h" +#include "fts0vlc.h" /** The FTS optimize thread's work queue. */ ib_wqueue_t* fts_optimize_wq; @@ -1116,7 +1117,7 @@ fts_optimize_encode_node( ulint pos_enc_len; doc_id_t doc_id_delta; dberr_t error = DB_SUCCESS; - byte* src = enc->src_ilist_ptr; + const byte* src = enc->src_ilist_ptr; if (node->first_doc_id == 0) { ut_a(node->last_doc_id == 0); @@ -1173,7 +1174,7 @@ fts_optimize_encode_node( /* Encode the doc id. Cast to ulint, the delta should be small and therefore no loss of precision. */ - dst += fts_encode_int((ulint) doc_id_delta, dst); + dst = fts_encode_int(doc_id_delta, dst); /* Copy the encoded pos array. */ memcpy(dst, src, pos_enc_len); @@ -1220,7 +1221,8 @@ fts_optimize_node( doc_id_t delta; doc_id_t del_doc_id = FTS_NULL_DOC_ID; - delta = fts_decode_vlc(&enc->src_ilist_ptr); + delta = fts_decode_vlc( + (const byte**)&enc->src_ilist_ptr); test_again: /* Check whether the doc id is in the delete list, if @@ -1248,7 +1250,7 @@ test_again: /* Skip the entries for this document. */ while (*enc->src_ilist_ptr) { - fts_decode_vlc(&enc->src_ilist_ptr); + fts_decode_vlc((const byte**)&enc->src_ilist_ptr); } /* Skip the end of word position marker. */ diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc index b4c72e53afe..965c79bfaa5 100644 --- a/storage/innobase/fts/fts0que.cc +++ b/storage/innobase/fts/fts0que.cc @@ -34,6 +34,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang #include "fts0pars.h" #include "fts0types.h" #include "fts0plugin.h" +#include "fts0vlc.h" #include <iomanip> #include <vector> @@ -3224,7 +3225,7 @@ fts_query_filter_doc_ids( ulint len, /*!< in: doc id ilist size */ ibool calc_doc_count) /*!< in: whether to remember doc count */ { - byte* ptr = static_cast<byte*>(data); + const byte* ptr = static_cast<byte*>(data); doc_id_t doc_id = 0; ulint decoded = 0; ib_rbt_t* doc_freqs = word_freq->doc_freqs; @@ -3234,8 +3235,8 @@ fts_query_filter_doc_ids( ulint freq = 0; fts_doc_freq_t* doc_freq; fts_match_t* match = NULL; - ulint last_pos = 0; - ulint pos = fts_decode_vlc(&ptr); + doc_id_t last_pos = 0; + doc_id_t pos = fts_decode_vlc(&ptr); /* Some sanity checks. */ if (doc_id == 0) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 10e5cbe216f..91d245ad0e2 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -8543,8 +8543,7 @@ calc_row_difference( && prebuilt->table->fts && innobase_strcasecmp( field->field_name, FTS_DOC_ID_COL_NAME) == 0) { - doc_id = (doc_id_t) mach_read_from_n_little_endian( - n_ptr, 8); + doc_id = mach_read_uint64_little_endian(n_ptr); if (doc_id == 0) { return(DB_FTS_INVALID_DOCID); } @@ -8787,16 +8786,6 @@ calc_row_difference( << innodb_table->name; return(DB_FTS_INVALID_DOCID); - } else if ((doc_id - - prebuilt->table->fts->cache->next_doc_id) - >= FTS_DOC_ID_MAX_STEP) { - - ib::warn() << "Doc ID " << doc_id << " is too" - " big. Its difference with largest" - " Doc ID used " << prebuilt->table->fts - ->cache->next_doc_id - 1 - << " cannot exceed or equal to " - << FTS_DOC_ID_MAX_STEP; } diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 03c0efff027..1111d974ad0 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -58,6 +58,7 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits) #include "fil0fil.h" #include "fil0crypt.h" #include "dict0crea.h" +#include "fts0vlc.h" /** The latest successfully looked up innodb_fts_aux_table */ UNIV_INTERN table_id_t innodb_ft_aux_table_id; @@ -2775,7 +2776,7 @@ i_s_fts_index_cache_fill_one_index( /* Decrypt the ilist, and display Dod ID and word position */ for (ulint i = 0; i < ib_vector_size(word->nodes); i++) { fts_node_t* node; - byte* ptr; + const byte* ptr; ulint decoded = 0; doc_id_t doc_id = 0; @@ -2785,13 +2786,11 @@ i_s_fts_index_cache_fill_one_index( ptr = node->ilist; while (decoded < node->ilist_size) { - ulint pos = fts_decode_vlc(&ptr); - doc_id += pos; + doc_id += fts_decode_vlc(&ptr); /* Get position info */ while (*ptr) { - pos = fts_decode_vlc(&ptr); OK(field_store_string( fields[I_S_FTS_WORD], @@ -2812,7 +2811,7 @@ i_s_fts_index_cache_fill_one_index( doc_id, true)); OK(fields[I_S_FTS_ILIST_DOC_POS]->store( - pos, true)); + fts_decode_vlc(&ptr), true)); OK(schema_table_store_record( thd, table)); @@ -3146,7 +3145,7 @@ i_s_fts_index_table_fill_one_fetch( /* Decrypt the ilist, and display Dod ID and word position */ for (ulint i = 0; i < ib_vector_size(word->nodes); i++) { fts_node_t* node; - byte* ptr; + const byte* ptr; ulint decoded = 0; doc_id_t doc_id = 0; @@ -3156,13 +3155,10 @@ i_s_fts_index_table_fill_one_fetch( ptr = node->ilist; while (decoded < node->ilist_size) { - ulint pos = fts_decode_vlc(&ptr); - - doc_id += pos; + doc_id += fts_decode_vlc(&ptr); /* Get position info */ while (*ptr) { - pos = fts_decode_vlc(&ptr); OK(field_store_string( fields[I_S_FTS_WORD], @@ -3181,7 +3177,7 @@ i_s_fts_index_table_fill_one_fetch( longlong(doc_id), true)); OK(fields[I_S_FTS_ILIST_DOC_POS]->store( - pos, true)); + fts_decode_vlc(&ptr), true)); OK(schema_table_store_record( thd, table)); diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index 84d8ccd26ef..dfac5117c17 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -96,10 +96,6 @@ those defined in mysql file ft_global.h */ /** Threshold where our optimize thread automatically kicks in */ #define FTS_OPTIMIZE_THRESHOLD 10000000 -/** Threshold to avoid exhausting of doc ids. Consecutive doc id difference -should not exceed FTS_DOC_ID_MAX_STEP */ -#define FTS_DOC_ID_MAX_STEP 65535 - /** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */ #define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4) diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h index f5760a16c0e..21d32c7d313 100644 --- a/storage/innobase/include/fts0types.h +++ b/storage/innobase/include/fts0types.h @@ -315,16 +315,6 @@ int fts_doc_id_cmp( const void* p2); /*!< in: id2 */ /******************************************************************//** -Decode and return the integer that was encoded using our VLC scheme.*/ -UNIV_INLINE -ulint -fts_decode_vlc( -/*===========*/ - /*!< out: value decoded */ - byte** ptr); /*!< in: ptr to decode from, this ptr is - incremented by the number of bytes decoded */ - -/******************************************************************//** Duplicate a string. */ UNIV_INLINE void @@ -339,28 +329,6 @@ fts_string_dup( mem_heap_t* heap); /*!< in: heap to use */ /******************************************************************//** -Return length of val if it were encoded using our VLC scheme. */ -UNIV_INLINE -ulint -fts_get_encoded_len( -/*================*/ - /*!< out: length of value - encoded, in bytes */ - ulint val); /*!< in: value to encode */ - -/******************************************************************//** -Encode an integer using our VLC scheme and return the length in bytes. */ -UNIV_INLINE -ulint -fts_encode_int( -/*===========*/ - /*!< out: length of value - encoded, in bytes */ - ulint val, /*!< in: value to encode */ - byte* buf); /*!< in: buffer, must have - enough space */ - -/******************************************************************//** Get the selected FTS aux INDEX suffix. */ UNIV_INLINE const char* @@ -381,6 +349,5 @@ fts_select_index( ulint len); #include "fts0types.ic" -#include "fts0vlc.ic" #endif /* INNOBASE_FTS0TYPES_H */ diff --git a/storage/innobase/include/fts0vlc.h b/storage/innobase/include/fts0vlc.h new file mode 100644 index 00000000000..d6e6037777e --- /dev/null +++ b/storage/innobase/include/fts0vlc.h @@ -0,0 +1,124 @@ +/** + +Copyright (c) 2021, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +**/ +/** +@file include/fts0vlc.h +Full text variable length integer encoding/decoding. + +Created 2021-10-19 Thirunarayanan Balathandayuthapani +**/ + +/** Return length of val if it were encoded using our VLC scheme. +@param val value to encode +@return length of value encoded, in bytes */ +inline size_t fts_get_encoded_len(doc_id_t val) +{ + if (val < static_cast<doc_id_t>(1) << 7) + return 1; + if (val < static_cast<doc_id_t>(1) << 14) + return 2; + if (val < static_cast<doc_id_t>(1) << 21) + return 3; + if (val < static_cast<doc_id_t>(1) << 28) + return 4; + if (val < static_cast<doc_id_t>(1) << 35) + return 5; + if (val < static_cast<doc_id_t>(1) << 42) + return 6; + if (val < static_cast<doc_id_t>(1) << 49) + return 7; + if (val < static_cast<doc_id_t>(1) << 56) + return 8; + if (val < static_cast<doc_id_t>(1) << 63) + return 9; + return 10; +} + +/** Encode an integer using our VLC scheme and return the +length in bytes. +@param val value to encode +@param buf buffer, must have enough space +@return length of value encoded, in bytes */ +inline byte *fts_encode_int(doc_id_t val, byte *buf) +{ + if (val < static_cast<doc_id_t>(1) << 7) + goto add_1; + if (val < static_cast<doc_id_t>(1) << 14) + goto add_2; + if (val < static_cast<doc_id_t>(1) << 21) + goto add_3; + if (val < static_cast<doc_id_t>(1) << 28) + goto add_4; + if (val < static_cast<doc_id_t>(1) << 35) + goto add_5; + if (val < static_cast<doc_id_t>(1) << 42) + goto add_6; + if (val < static_cast<doc_id_t>(1) << 49) + goto add_7; + if (val < static_cast<doc_id_t>(1) << 56) + goto add_8; + if (val < static_cast<doc_id_t>(1) << 63) + goto add_9; + + *buf++= static_cast<byte>(val >> 63); +add_9: + *buf++= static_cast<byte>(val >> 56) & 0x7F; +add_8: + *buf++= static_cast<byte>(val >> 49) & 0x7F; +add_7: + *buf++= static_cast<byte>(val >> 42) & 0x7F; +add_6: + *buf++= static_cast<byte>(val >> 35) & 0x7F; +add_5: + *buf++= static_cast<byte>(val >> 28) & 0x7F; +add_4: + *buf++= static_cast<byte>(val >> 21) & 0x7F; +add_3: + *buf++= static_cast<byte>(val >> 14) & 0x7F; +add_2: + *buf++= static_cast<byte>(val >> 7) & 0x7F; +add_1: + *buf++= static_cast<byte>(val) | 0x80; + return buf; +} + +/** Decode and return the integer that was encoded using +our VLC scheme. +@param ptr pointer to decode from, this ptr is + incremented by the number of bytes decoded +@return value decoded */ +inline doc_id_t fts_decode_vlc(const byte **ptr) +{ + ut_d(const byte *const start= *ptr); + ut_ad(*start); + + doc_id_t val= 0; + for (;;) + { + byte b= *(*ptr)++; + val|= (b & 0x7F); + + /* High-bit on means "last byte in the encoded integer". */ + if (b & 0x80) + break; + ut_ad(val < static_cast<doc_id_t>(1) << (64 - 7)); + val <<= 7; + } + + ut_ad(*ptr - start <= 10); + + return(val); +} diff --git a/storage/innobase/include/fts0vlc.ic b/storage/innobase/include/fts0vlc.ic deleted file mode 100644 index 75d8535057e..00000000000 --- a/storage/innobase/include/fts0vlc.ic +++ /dev/null @@ -1,142 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0vlc.ic -Full text variable length integer encoding/decoding. - -Created 2007-03-27 Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_FTS0VLC_IC -#define INNOBASE_FTS0VLC_IC - -#include "fts0types.h" - -/******************************************************************//** -Return length of val if it were encoded using our VLC scheme. -FIXME: We will need to be able encode 8 bytes value -@return length of value encoded, in bytes */ -UNIV_INLINE -ulint -fts_get_encoded_len( -/*================*/ - ulint val) /* in: value to encode */ -{ - if (val <= 127) { - return(1); - } else if (val <= 16383) { - return(2); - } else if (val <= 2097151) { - return(3); - } else if (val <= 268435455) { - return(4); - } else { - /* Possibly we should care that on 64-bit machines ulint can - contain values that we can't encode in 5 bytes, but - fts_encode_int doesn't handle them either so it doesn't much - matter. */ - - return(5); - } -} - -/******************************************************************//** -Encode an integer using our VLC scheme and return the length in bytes. -@return length of value encoded, in bytes */ -UNIV_INLINE -ulint -fts_encode_int( -/*===========*/ - ulint val, /* in: value to encode */ - byte* buf) /* in: buffer, must have enough space */ -{ - ulint len; - - if (val <= 127) { - *buf = (byte) val; - - len = 1; - } else if (val <= 16383) { - *buf++ = (byte)(val >> 7); - *buf = (byte)(val & 0x7F); - - len = 2; - } else if (val <= 2097151) { - *buf++ = (byte)(val >> 14); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 3; - } else if (val <= 268435455) { - *buf++ = (byte)(val >> 21); - *buf++ = (byte)((val >> 14) & 0x7F); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 4; - } else { - /* Best to keep the limitations of the 32/64 bit versions - identical, at least for the time being. */ - ut_ad(val <= 4294967295u); - - *buf++ = (byte)(val >> 28); - *buf++ = (byte)((val >> 21) & 0x7F); - *buf++ = (byte)((val >> 14) & 0x7F); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 5; - } - - /* High-bit on means "last byte in the encoded integer". */ - *buf |= 0x80; - - return(len); -} - -/******************************************************************//** -Decode and return the integer that was encoded using our VLC scheme. -@return value decoded */ -UNIV_INLINE -ulint -fts_decode_vlc( -/*===========*/ - byte** ptr) /* in: ptr to decode from, this ptr is - incremented by the number of bytes decoded */ -{ - ulint val = 0; - - for (;;) { - byte b = **ptr; - - ++*ptr; - val |= (b & 0x7F); - - /* High-bit on means "last byte in the encoded integer". */ - if (b & 0x80) { - break; - } else { - val <<= 7; - } - } - - return(val); -} - -#endif diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h index 8141c8a91e0..860ef20b8bd 100644 --- a/storage/innobase/include/mach0data.h +++ b/storage/innobase/include/mach0data.h @@ -316,6 +316,28 @@ mach_read_from_n_little_endian( const byte* buf, /*!< in: from where to read */ ulint buf_size) /*!< in: from how many bytes to read */ MY_ATTRIBUTE((warn_unused_result)); + + +/** Reads a 64 bit stored in big endian format +@param buf From where to read +@return uint64_t */ +UNIV_INLINE +uint64_t +mach_read_uint64_little_endian(const byte* buf) +{ +#ifdef WORDS_BIGENDIAN + return + uint64_t(buf[0]) | uint64_t(buf[1]) << 8 | + uint64_t(buf[2]) << 16 | uint64_t(buf[3]) << 24 | + uint64_t(buf[4]) << 32 | uint64_t(buf[5]) << 40 | + uint64_t(buf[6]) << 48 | uint64_t(buf[7]) << 56; +#else + uint64_t n; + memcpy(&n, buf, sizeof(uint64_t)); + return n; +#endif +} + /*********************************************************//** Writes a ulint in the little-endian format. */ UNIV_INLINE diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index c2f9186d408..6445f67f3c2 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -1465,23 +1465,6 @@ error_exit: trx->error_state = DB_FTS_INVALID_DOCID; goto error_exit; } - - /* Difference between Doc IDs are restricted within - 4 bytes integer. See fts_get_encoded_len(). Consecutive - doc_ids difference should not exceed - FTS_DOC_ID_MAX_STEP value. */ - - if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) { - ib::error() << "Doc ID " << doc_id - << " is too big. Its difference with" - " largest used Doc ID " - << next_doc_id - 1 << " cannot" - " exceed or equal to " - << FTS_DOC_ID_MAX_STEP; - err = DB_FTS_INVALID_DOCID; - trx->error_state = DB_FTS_INVALID_DOCID; - goto error_exit; - } } if (table->skip_alter_undo) { diff --git a/storage/innobase/unittest/CMakeLists.txt b/storage/innobase/unittest/CMakeLists.txt new file mode 100644 index 00000000000..df98cddf73e --- /dev/null +++ b/storage/innobase/unittest/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (c) 2021, MariaDB Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/unittest/mytap + ${CMAKE_SOURCE_DIR}/storage/innobase/include) +ADD_EXECUTABLE(innodb_fts-t innodb_fts-t.cc) +TARGET_LINK_LIBRARIES(innodb_fts-t mysys mytap) +ADD_DEPENDENCIES(innodb_fts-t GenError) +MY_ADD_TEST(innodb_fts) diff --git a/storage/innobase/unittest/innodb_fts-t.cc b/storage/innobase/unittest/innodb_fts-t.cc new file mode 100644 index 00000000000..57585e337c2 --- /dev/null +++ b/storage/innobase/unittest/innodb_fts-t.cc @@ -0,0 +1,52 @@ +#include "tap.h" +#include "fts0fts.h" +#include "fts0vlc.h" + +struct fts_encode_info +{ + const byte buf[10]; + int32_t len; + doc_id_t val; +}; + +/* Contains fts encoding min & max value for each length bytes */ +static const fts_encode_info fts_info[]= +{ + {{0x80}, 1, 0}, + {{0xFF}, 1, (1 << 7) - 1}, + {{0x01, 0x80}, 2, 1 << 7}, + {{0x7F, 0XFF}, 2, (1 << 14) - 1}, + {{0x01, 0x00, 0x80}, 3, 1 << 14}, + {{0x7F, 0X7F, 0XFF}, 3, (1 << 21) - 1}, + {{0x01, 0x00, 0x00, 0x80}, 4, 1 << 21}, + {{0x7F, 0X7F, 0X7F, 0xFF}, 4, (1 << 28) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x80}, 5, 1 << 28}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0xFF}, 5, (1ULL << 35) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x80}, 6, 1ULL << 35}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0xFF}, 6, (1ULL << 42) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 7, 1ULL << 42}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0XFF}, 7, (1ULL << 49) - 1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 8, 1ULL << 49}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0XFF}, 8, (1ULL << 56) -1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 9, 1ULL << 56}, + {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0XFF}, 9, (1ULL << 63) -1}, + {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 10, 1ULL << 63}, + {{0x01, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0x7F, 0xFF}, 10, ~0ULL} +}; + +int main(int, char**) +{ + for (int i= array_elements(fts_info); i--;) + { + byte buf[10]; + const byte* fts_buf= buf; + int32_t len= fts_encode_int(fts_info[i].val, buf) - &buf[0]; + if (fts_info[i].len == len && + !memcmp(&fts_info[i].buf, buf, len) && + fts_decode_vlc(&fts_buf) == fts_info[i].val && + fts_buf == &buf[len]) + ok(true, "FTS Encoded for %d bytes", fts_info[i].len); + else + ok(false, "FTS Encoded for %d bytes", fts_info[i].len); + } +} |