summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2021-10-06 18:50:56 +0530
committerMarko Mäkelä <marko.makela@mariadb.com>2021-10-21 12:56:59 +0300
commit8ce8c269f461b7a578cf0bb5cce18f1d83ebeb9b (patch)
tree3a36a4ca48bcea74ccfbc328ecff5e9647424d71
parent6b4fad94029bb73fbb3f9d05f2dfed09e83ec31c (diff)
downloadmariadb-git-8ce8c269f461b7a578cf0bb5cce18f1d83ebeb9b.tar.gz
MDEV-19522 InnoDB commit fails when FTS_DOC_ID value is greater than 4294967295
InnoDB commit fails when consecutive FTS_DOC_ID value is greater than 4294967295. Fix is that InnoDB should remove the delta FTS_DOC_ID value limitations and fts should encode 8 byte value, remove FTS_DOC_ID_MAX_STEP variable. Replaced the fts0vlc.ic file with fts0vlc.h fts_encode_int(): Should be able to encode 10 bytes value fts_get_encoded_len(): Should get the length of the value which has 10 bytes fts_decode_vlc(): Add debug assertion to verify the maximum length allowed is 10. mach_read_uint64_little_endian(): Reads 64 bit stored in little endian format Added a unit test case which check for minimum and maximum value to do the fts encoding
-rw-r--r--mysql-test/suite/innodb_fts/r/basic.result2
-rw-r--r--mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result21
-rw-r--r--mysql-test/suite/innodb_fts/t/basic.test2
-rw-r--r--mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test18
-rw-r--r--storage/innobase/CMakeLists.txt4
-rw-r--r--storage/innobase/fts/fts0fts.cc10
-rw-r--r--storage/innobase/fts/fts0opt.cc10
-rw-r--r--storage/innobase/fts/fts0que.cc7
-rw-r--r--storage/innobase/handler/ha_innodb.cc13
-rw-r--r--storage/innobase/handler/i_s.cc18
-rw-r--r--storage/innobase/include/fts0fts.h4
-rw-r--r--storage/innobase/include/fts0types.h33
-rw-r--r--storage/innobase/include/fts0vlc.h124
-rw-r--r--storage/innobase/include/fts0vlc.ic142
-rw-r--r--storage/innobase/include/mach0data.h22
-rw-r--r--storage/innobase/row/row0mysql.cc17
-rw-r--r--storage/innobase/unittest/CMakeLists.txt22
-rw-r--r--storage/innobase/unittest/innodb_fts-t.cc52
18 files changed, 286 insertions, 235 deletions
diff --git a/mysql-test/suite/innodb_fts/r/basic.result b/mysql-test/suite/innodb_fts/r/basic.result
index b3fd94509c3..a98de60674a 100644
--- a/mysql-test/suite/innodb_fts/r/basic.result
+++ b/mysql-test/suite/innodb_fts/r/basic.result
@@ -313,9 +313,7 @@ FTS_DOC_ID
65536
131071
drop table t1;
-call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535");
CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000);
-ERROR HY000: Invalid InnoDB FTS Doc ID
DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result b/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result
index 8ec0157728c..42730d7916a 100644
--- a/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result
+++ b/mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result
@@ -972,3 +972,24 @@ SELECT * FROM information_schema.innodb_ft_deleted;
DOC_ID
DROP TABLE t1;
SET GLOBAL innodb_ft_aux_table=DEFAULT;
+#
+# MDEV-19522 InnoDB commit fails when FTS_DOC_ID value
+# is greater than 4294967295
+#
+CREATE TABLE t1(
+FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID),
+FULLTEXT KEY (f1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1,'txt','bbb');
+UPDATE t1 SET FTS_DOC_ID = 4294967298;
+SELECT * FROM t1 WHERE match(f1) against("txt");
+FTS_DOC_ID f1 f2
+4294967298 txt bbb
+SET @@session.insert_id = 100000000000;
+INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb');
+CREATE FULLTEXT INDEX i ON t1 (f2);
+SELECT * FROM t1 WHERE match(f2) against("bbb");
+FTS_DOC_ID f1 f2
+4294967298 txt bbb
+100000000000 aaa bbb
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_fts/t/basic.test b/mysql-test/suite/innodb_fts/t/basic.test
index 7a5c83ffb06..53ad978a5b1 100644
--- a/mysql-test/suite/innodb_fts/t/basic.test
+++ b/mysql-test/suite/innodb_fts/t/basic.test
@@ -277,9 +277,7 @@ insert into t1(f1, f2) values(3, "This is the third record");
select FTS_DOC_ID from t1;
drop table t1;
-call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535");
CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB;
---error 182
INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000);
DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test b/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test
index adc10886d66..b0bf2c669ad 100644
--- a/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test
+++ b/mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test
@@ -942,3 +942,21 @@ SET GLOBAL innodb_ft_aux_table='test/t1';
SELECT * FROM information_schema.innodb_ft_deleted;
DROP TABLE t1;
SET GLOBAL innodb_ft_aux_table=DEFAULT;
+
+--echo #
+--echo # MDEV-19522 InnoDB commit fails when FTS_DOC_ID value
+--echo # is greater than 4294967295
+--echo #
+CREATE TABLE t1(
+ FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID),
+ FULLTEXT KEY (f1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1,'txt','bbb');
+UPDATE t1 SET FTS_DOC_ID = 4294967298;
+SELECT * FROM t1 WHERE match(f1) against("txt");
+SET @@session.insert_id = 100000000000;
+INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb');
+CREATE FULLTEXT INDEX i ON t1 (f2);
+SELECT * FROM t1 WHERE match(f2) against("bbb");
+# Cleanup
+DROP TABLE t1;
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index b965c4fbb45..ff646e00f39 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -188,3 +188,7 @@ IF(MSVC)
ENDIF()
ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup)
+
+IF(WITH_UNIT_TESTS)
+ ADD_SUBDIRECTORY(unittest)
+ENDIF()
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 07077006096..bbe53f4d163 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -32,7 +32,7 @@ Full Text Search interface
#include "fts0priv.h"
#include "fts0types.h"
#include "fts0types.ic"
-#include "fts0vlc.ic"
+#include "fts0vlc.h"
#include "fts0plugin.h"
#include "dict0priv.h"
#include "dict0stats.h"
@@ -1247,7 +1247,7 @@ fts_cache_node_add_positions(
ulint enc_len;
ulint last_pos;
byte* ptr_start;
- ulint doc_id_delta;
+ doc_id_t doc_id_delta;
#ifdef UNIV_DEBUG
if (cache) {
@@ -1258,7 +1258,7 @@ fts_cache_node_add_positions(
ut_ad(doc_id >= node->last_doc_id);
/* Calculate the space required to store the ilist. */
- doc_id_delta = (ulint)(doc_id - node->last_doc_id);
+ doc_id_delta = doc_id - node->last_doc_id;
enc_len = fts_get_encoded_len(doc_id_delta);
last_pos = 0;
@@ -1307,14 +1307,14 @@ fts_cache_node_add_positions(
ptr_start = ptr;
/* Encode the new fragment. */
- ptr += fts_encode_int(doc_id_delta, ptr);
+ ptr = fts_encode_int(doc_id_delta, ptr);
last_pos = 0;
for (i = 0; i < ib_vector_size(positions); i++) {
ulint pos = *(static_cast<ulint*>(
ib_vector_get(positions, i)));
- ptr += fts_encode_int(pos - last_pos, ptr);
+ ptr = fts_encode_int(pos - last_pos, ptr);
last_pos = pos;
}
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index c4cbbfafff4..2d506a757a0 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -36,6 +36,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "ut0list.h"
#include "zlib.h"
#include "fts0opt.h"
+#include "fts0vlc.h"
/** The FTS optimize thread's work queue. */
ib_wqueue_t* fts_optimize_wq;
@@ -1116,7 +1117,7 @@ fts_optimize_encode_node(
ulint pos_enc_len;
doc_id_t doc_id_delta;
dberr_t error = DB_SUCCESS;
- byte* src = enc->src_ilist_ptr;
+ const byte* src = enc->src_ilist_ptr;
if (node->first_doc_id == 0) {
ut_a(node->last_doc_id == 0);
@@ -1173,7 +1174,7 @@ fts_optimize_encode_node(
/* Encode the doc id. Cast to ulint, the delta should be small and
therefore no loss of precision. */
- dst += fts_encode_int((ulint) doc_id_delta, dst);
+ dst = fts_encode_int(doc_id_delta, dst);
/* Copy the encoded pos array. */
memcpy(dst, src, pos_enc_len);
@@ -1220,7 +1221,8 @@ fts_optimize_node(
doc_id_t delta;
doc_id_t del_doc_id = FTS_NULL_DOC_ID;
- delta = fts_decode_vlc(&enc->src_ilist_ptr);
+ delta = fts_decode_vlc(
+ (const byte**)&enc->src_ilist_ptr);
test_again:
/* Check whether the doc id is in the delete list, if
@@ -1248,7 +1250,7 @@ test_again:
/* Skip the entries for this document. */
while (*enc->src_ilist_ptr) {
- fts_decode_vlc(&enc->src_ilist_ptr);
+ fts_decode_vlc((const byte**)&enc->src_ilist_ptr);
}
/* Skip the end of word position marker. */
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index b4c72e53afe..965c79bfaa5 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -34,6 +34,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "fts0pars.h"
#include "fts0types.h"
#include "fts0plugin.h"
+#include "fts0vlc.h"
#include <iomanip>
#include <vector>
@@ -3224,7 +3225,7 @@ fts_query_filter_doc_ids(
ulint len, /*!< in: doc id ilist size */
ibool calc_doc_count) /*!< in: whether to remember doc count */
{
- byte* ptr = static_cast<byte*>(data);
+ const byte* ptr = static_cast<byte*>(data);
doc_id_t doc_id = 0;
ulint decoded = 0;
ib_rbt_t* doc_freqs = word_freq->doc_freqs;
@@ -3234,8 +3235,8 @@ fts_query_filter_doc_ids(
ulint freq = 0;
fts_doc_freq_t* doc_freq;
fts_match_t* match = NULL;
- ulint last_pos = 0;
- ulint pos = fts_decode_vlc(&ptr);
+ doc_id_t last_pos = 0;
+ doc_id_t pos = fts_decode_vlc(&ptr);
/* Some sanity checks. */
if (doc_id == 0) {
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 10e5cbe216f..91d245ad0e2 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -8543,8 +8543,7 @@ calc_row_difference(
&& prebuilt->table->fts
&& innobase_strcasecmp(
field->field_name, FTS_DOC_ID_COL_NAME) == 0) {
- doc_id = (doc_id_t) mach_read_from_n_little_endian(
- n_ptr, 8);
+ doc_id = mach_read_uint64_little_endian(n_ptr);
if (doc_id == 0) {
return(DB_FTS_INVALID_DOCID);
}
@@ -8787,16 +8786,6 @@ calc_row_difference(
<< innodb_table->name;
return(DB_FTS_INVALID_DOCID);
- } else if ((doc_id
- - prebuilt->table->fts->cache->next_doc_id)
- >= FTS_DOC_ID_MAX_STEP) {
-
- ib::warn() << "Doc ID " << doc_id << " is too"
- " big. Its difference with largest"
- " Doc ID used " << prebuilt->table->fts
- ->cache->next_doc_id - 1
- << " cannot exceed or equal to "
- << FTS_DOC_ID_MAX_STEP;
}
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 03c0efff027..1111d974ad0 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -58,6 +58,7 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
#include "fil0fil.h"
#include "fil0crypt.h"
#include "dict0crea.h"
+#include "fts0vlc.h"
/** The latest successfully looked up innodb_fts_aux_table */
UNIV_INTERN table_id_t innodb_ft_aux_table_id;
@@ -2775,7 +2776,7 @@ i_s_fts_index_cache_fill_one_index(
/* Decrypt the ilist, and display Dod ID and word position */
for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
fts_node_t* node;
- byte* ptr;
+ const byte* ptr;
ulint decoded = 0;
doc_id_t doc_id = 0;
@@ -2785,13 +2786,11 @@ i_s_fts_index_cache_fill_one_index(
ptr = node->ilist;
while (decoded < node->ilist_size) {
- ulint pos = fts_decode_vlc(&ptr);
- doc_id += pos;
+ doc_id += fts_decode_vlc(&ptr);
/* Get position info */
while (*ptr) {
- pos = fts_decode_vlc(&ptr);
OK(field_store_string(
fields[I_S_FTS_WORD],
@@ -2812,7 +2811,7 @@ i_s_fts_index_cache_fill_one_index(
doc_id, true));
OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
- pos, true));
+ fts_decode_vlc(&ptr), true));
OK(schema_table_store_record(
thd, table));
@@ -3146,7 +3145,7 @@ i_s_fts_index_table_fill_one_fetch(
/* Decrypt the ilist, and display Dod ID and word position */
for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
fts_node_t* node;
- byte* ptr;
+ const byte* ptr;
ulint decoded = 0;
doc_id_t doc_id = 0;
@@ -3156,13 +3155,10 @@ i_s_fts_index_table_fill_one_fetch(
ptr = node->ilist;
while (decoded < node->ilist_size) {
- ulint pos = fts_decode_vlc(&ptr);
-
- doc_id += pos;
+ doc_id += fts_decode_vlc(&ptr);
/* Get position info */
while (*ptr) {
- pos = fts_decode_vlc(&ptr);
OK(field_store_string(
fields[I_S_FTS_WORD],
@@ -3181,7 +3177,7 @@ i_s_fts_index_table_fill_one_fetch(
longlong(doc_id), true));
OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
- pos, true));
+ fts_decode_vlc(&ptr), true));
OK(schema_table_store_record(
thd, table));
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 84d8ccd26ef..dfac5117c17 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -96,10 +96,6 @@ those defined in mysql file ft_global.h */
/** Threshold where our optimize thread automatically kicks in */
#define FTS_OPTIMIZE_THRESHOLD 10000000
-/** Threshold to avoid exhausting of doc ids. Consecutive doc id difference
-should not exceed FTS_DOC_ID_MAX_STEP */
-#define FTS_DOC_ID_MAX_STEP 65535
-
/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
#define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4)
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index f5760a16c0e..21d32c7d313 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -315,16 +315,6 @@ int fts_doc_id_cmp(
const void* p2); /*!< in: id2 */
/******************************************************************//**
-Decode and return the integer that was encoded using our VLC scheme.*/
-UNIV_INLINE
-ulint
-fts_decode_vlc(
-/*===========*/
- /*!< out: value decoded */
- byte** ptr); /*!< in: ptr to decode from, this ptr is
- incremented by the number of bytes decoded */
-
-/******************************************************************//**
Duplicate a string. */
UNIV_INLINE
void
@@ -339,28 +329,6 @@ fts_string_dup(
mem_heap_t* heap); /*!< in: heap to use */
/******************************************************************//**
-Return length of val if it were encoded using our VLC scheme. */
-UNIV_INLINE
-ulint
-fts_get_encoded_len(
-/*================*/
- /*!< out: length of value
- encoded, in bytes */
- ulint val); /*!< in: value to encode */
-
-/******************************************************************//**
-Encode an integer using our VLC scheme and return the length in bytes. */
-UNIV_INLINE
-ulint
-fts_encode_int(
-/*===========*/
- /*!< out: length of value
- encoded, in bytes */
- ulint val, /*!< in: value to encode */
- byte* buf); /*!< in: buffer, must have
- enough space */
-
-/******************************************************************//**
Get the selected FTS aux INDEX suffix. */
UNIV_INLINE
const char*
@@ -381,6 +349,5 @@ fts_select_index(
ulint len);
#include "fts0types.ic"
-#include "fts0vlc.ic"
#endif /* INNOBASE_FTS0TYPES_H */
diff --git a/storage/innobase/include/fts0vlc.h b/storage/innobase/include/fts0vlc.h
new file mode 100644
index 00000000000..d6e6037777e
--- /dev/null
+++ b/storage/innobase/include/fts0vlc.h
@@ -0,0 +1,124 @@
+/**
+
+Copyright (c) 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+**/
+/**
+@file include/fts0vlc.h
+Full text variable length integer encoding/decoding.
+
+Created 2021-10-19 Thirunarayanan Balathandayuthapani
+**/
+
+/** Return length of val if it were encoded using our VLC scheme.
+@param val value to encode
+@return length of value encoded, in bytes */
+inline size_t fts_get_encoded_len(doc_id_t val)
+{
+ if (val < static_cast<doc_id_t>(1) << 7)
+ return 1;
+ if (val < static_cast<doc_id_t>(1) << 14)
+ return 2;
+ if (val < static_cast<doc_id_t>(1) << 21)
+ return 3;
+ if (val < static_cast<doc_id_t>(1) << 28)
+ return 4;
+ if (val < static_cast<doc_id_t>(1) << 35)
+ return 5;
+ if (val < static_cast<doc_id_t>(1) << 42)
+ return 6;
+ if (val < static_cast<doc_id_t>(1) << 49)
+ return 7;
+ if (val < static_cast<doc_id_t>(1) << 56)
+ return 8;
+ if (val < static_cast<doc_id_t>(1) << 63)
+ return 9;
+ return 10;
+}
+
+/** Encode an integer using our VLC scheme and return the
+length in bytes.
+@param val value to encode
+@param buf buffer, must have enough space
+@return length of value encoded, in bytes */
+inline byte *fts_encode_int(doc_id_t val, byte *buf)
+{
+ if (val < static_cast<doc_id_t>(1) << 7)
+ goto add_1;
+ if (val < static_cast<doc_id_t>(1) << 14)
+ goto add_2;
+ if (val < static_cast<doc_id_t>(1) << 21)
+ goto add_3;
+ if (val < static_cast<doc_id_t>(1) << 28)
+ goto add_4;
+ if (val < static_cast<doc_id_t>(1) << 35)
+ goto add_5;
+ if (val < static_cast<doc_id_t>(1) << 42)
+ goto add_6;
+ if (val < static_cast<doc_id_t>(1) << 49)
+ goto add_7;
+ if (val < static_cast<doc_id_t>(1) << 56)
+ goto add_8;
+ if (val < static_cast<doc_id_t>(1) << 63)
+ goto add_9;
+
+ *buf++= static_cast<byte>(val >> 63);
+add_9:
+ *buf++= static_cast<byte>(val >> 56) & 0x7F;
+add_8:
+ *buf++= static_cast<byte>(val >> 49) & 0x7F;
+add_7:
+ *buf++= static_cast<byte>(val >> 42) & 0x7F;
+add_6:
+ *buf++= static_cast<byte>(val >> 35) & 0x7F;
+add_5:
+ *buf++= static_cast<byte>(val >> 28) & 0x7F;
+add_4:
+ *buf++= static_cast<byte>(val >> 21) & 0x7F;
+add_3:
+ *buf++= static_cast<byte>(val >> 14) & 0x7F;
+add_2:
+ *buf++= static_cast<byte>(val >> 7) & 0x7F;
+add_1:
+ *buf++= static_cast<byte>(val) | 0x80;
+ return buf;
+}
+
+/** Decode and return the integer that was encoded using
+our VLC scheme.
+@param ptr pointer to decode from, this ptr is
+ incremented by the number of bytes decoded
+@return value decoded */
+inline doc_id_t fts_decode_vlc(const byte **ptr)
+{
+ ut_d(const byte *const start= *ptr);
+ ut_ad(*start);
+
+ doc_id_t val= 0;
+ for (;;)
+ {
+ byte b= *(*ptr)++;
+ val|= (b & 0x7F);
+
+ /* High-bit on means "last byte in the encoded integer". */
+ if (b & 0x80)
+ break;
+ ut_ad(val < static_cast<doc_id_t>(1) << (64 - 7));
+ val <<= 7;
+ }
+
+ ut_ad(*ptr - start <= 10);
+
+ return(val);
+}
diff --git a/storage/innobase/include/fts0vlc.ic b/storage/innobase/include/fts0vlc.ic
deleted file mode 100644
index 75d8535057e..00000000000
--- a/storage/innobase/include/fts0vlc.ic
+++ /dev/null
@@ -1,142 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0vlc.ic
-Full text variable length integer encoding/decoding.
-
-Created 2007-03-27 Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_FTS0VLC_IC
-#define INNOBASE_FTS0VLC_IC
-
-#include "fts0types.h"
-
-/******************************************************************//**
-Return length of val if it were encoded using our VLC scheme.
-FIXME: We will need to be able encode 8 bytes value
-@return length of value encoded, in bytes */
-UNIV_INLINE
-ulint
-fts_get_encoded_len(
-/*================*/
- ulint val) /* in: value to encode */
-{
- if (val <= 127) {
- return(1);
- } else if (val <= 16383) {
- return(2);
- } else if (val <= 2097151) {
- return(3);
- } else if (val <= 268435455) {
- return(4);
- } else {
- /* Possibly we should care that on 64-bit machines ulint can
- contain values that we can't encode in 5 bytes, but
- fts_encode_int doesn't handle them either so it doesn't much
- matter. */
-
- return(5);
- }
-}
-
-/******************************************************************//**
-Encode an integer using our VLC scheme and return the length in bytes.
-@return length of value encoded, in bytes */
-UNIV_INLINE
-ulint
-fts_encode_int(
-/*===========*/
- ulint val, /* in: value to encode */
- byte* buf) /* in: buffer, must have enough space */
-{
- ulint len;
-
- if (val <= 127) {
- *buf = (byte) val;
-
- len = 1;
- } else if (val <= 16383) {
- *buf++ = (byte)(val >> 7);
- *buf = (byte)(val & 0x7F);
-
- len = 2;
- } else if (val <= 2097151) {
- *buf++ = (byte)(val >> 14);
- *buf++ = (byte)((val >> 7) & 0x7F);
- *buf = (byte)(val & 0x7F);
-
- len = 3;
- } else if (val <= 268435455) {
- *buf++ = (byte)(val >> 21);
- *buf++ = (byte)((val >> 14) & 0x7F);
- *buf++ = (byte)((val >> 7) & 0x7F);
- *buf = (byte)(val & 0x7F);
-
- len = 4;
- } else {
- /* Best to keep the limitations of the 32/64 bit versions
- identical, at least for the time being. */
- ut_ad(val <= 4294967295u);
-
- *buf++ = (byte)(val >> 28);
- *buf++ = (byte)((val >> 21) & 0x7F);
- *buf++ = (byte)((val >> 14) & 0x7F);
- *buf++ = (byte)((val >> 7) & 0x7F);
- *buf = (byte)(val & 0x7F);
-
- len = 5;
- }
-
- /* High-bit on means "last byte in the encoded integer". */
- *buf |= 0x80;
-
- return(len);
-}
-
-/******************************************************************//**
-Decode and return the integer that was encoded using our VLC scheme.
-@return value decoded */
-UNIV_INLINE
-ulint
-fts_decode_vlc(
-/*===========*/
- byte** ptr) /* in: ptr to decode from, this ptr is
- incremented by the number of bytes decoded */
-{
- ulint val = 0;
-
- for (;;) {
- byte b = **ptr;
-
- ++*ptr;
- val |= (b & 0x7F);
-
- /* High-bit on means "last byte in the encoded integer". */
- if (b & 0x80) {
- break;
- } else {
- val <<= 7;
- }
- }
-
- return(val);
-}
-
-#endif
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
index 8141c8a91e0..860ef20b8bd 100644
--- a/storage/innobase/include/mach0data.h
+++ b/storage/innobase/include/mach0data.h
@@ -316,6 +316,28 @@ mach_read_from_n_little_endian(
const byte* buf, /*!< in: from where to read */
ulint buf_size) /*!< in: from how many bytes to read */
MY_ATTRIBUTE((warn_unused_result));
+
+
+/** Reads a 64 bit stored in big endian format
+@param buf From where to read
+@return uint64_t */
+UNIV_INLINE
+uint64_t
+mach_read_uint64_little_endian(const byte* buf)
+{
+#ifdef WORDS_BIGENDIAN
+ return
+ uint64_t(buf[0]) | uint64_t(buf[1]) << 8 |
+ uint64_t(buf[2]) << 16 | uint64_t(buf[3]) << 24 |
+ uint64_t(buf[4]) << 32 | uint64_t(buf[5]) << 40 |
+ uint64_t(buf[6]) << 48 | uint64_t(buf[7]) << 56;
+#else
+ uint64_t n;
+ memcpy(&n, buf, sizeof(uint64_t));
+ return n;
+#endif
+}
+
/*********************************************************//**
Writes a ulint in the little-endian format. */
UNIV_INLINE
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index c2f9186d408..6445f67f3c2 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -1465,23 +1465,6 @@ error_exit:
trx->error_state = DB_FTS_INVALID_DOCID;
goto error_exit;
}
-
- /* Difference between Doc IDs are restricted within
- 4 bytes integer. See fts_get_encoded_len(). Consecutive
- doc_ids difference should not exceed
- FTS_DOC_ID_MAX_STEP value. */
-
- if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
- ib::error() << "Doc ID " << doc_id
- << " is too big. Its difference with"
- " largest used Doc ID "
- << next_doc_id - 1 << " cannot"
- " exceed or equal to "
- << FTS_DOC_ID_MAX_STEP;
- err = DB_FTS_INVALID_DOCID;
- trx->error_state = DB_FTS_INVALID_DOCID;
- goto error_exit;
- }
}
if (table->skip_alter_undo) {
diff --git a/storage/innobase/unittest/CMakeLists.txt b/storage/innobase/unittest/CMakeLists.txt
new file mode 100644
index 00000000000..df98cddf73e
--- /dev/null
+++ b/storage/innobase/unittest/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright (c) 2021, MariaDB Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_SOURCE_DIR}/unittest/mytap
+ ${CMAKE_SOURCE_DIR}/storage/innobase/include)
+ADD_EXECUTABLE(innodb_fts-t innodb_fts-t.cc)
+TARGET_LINK_LIBRARIES(innodb_fts-t mysys mytap)
+ADD_DEPENDENCIES(innodb_fts-t GenError)
+MY_ADD_TEST(innodb_fts)
diff --git a/storage/innobase/unittest/innodb_fts-t.cc b/storage/innobase/unittest/innodb_fts-t.cc
new file mode 100644
index 00000000000..57585e337c2
--- /dev/null
+++ b/storage/innobase/unittest/innodb_fts-t.cc
@@ -0,0 +1,52 @@
+#include "tap.h"
+#include "fts0fts.h"
+#include "fts0vlc.h"
+
+struct fts_encode_info
+{
+ const byte buf[10];
+ int32_t len;
+ doc_id_t val;
+};
+
+/* Contains fts encoding min & max value for each length bytes */
+static const fts_encode_info fts_info[]=
+{
+ {{0x80}, 1, 0},
+ {{0xFF}, 1, (1 << 7) - 1},
+ {{0x01, 0x80}, 2, 1 << 7},
+ {{0x7F, 0XFF}, 2, (1 << 14) - 1},
+ {{0x01, 0x00, 0x80}, 3, 1 << 14},
+ {{0x7F, 0X7F, 0XFF}, 3, (1 << 21) - 1},
+ {{0x01, 0x00, 0x00, 0x80}, 4, 1 << 21},
+ {{0x7F, 0X7F, 0X7F, 0xFF}, 4, (1 << 28) - 1},
+ {{0x01, 0x00, 0x00, 0x00, 0x80}, 5, 1 << 28},
+ {{0x7F, 0X7F, 0X7F, 0x7F, 0xFF}, 5, (1ULL << 35) - 1},
+ {{0x01, 0x00, 0x00, 0x00, 0x00, 0x80}, 6, 1ULL << 35},
+ {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0xFF}, 6, (1ULL << 42) - 1},
+ {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 7, 1ULL << 42},
+ {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0XFF}, 7, (1ULL << 49) - 1},
+ {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 8, 1ULL << 49},
+ {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0XFF}, 8, (1ULL << 56) -1},
+ {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 9, 1ULL << 56},
+ {{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0XFF}, 9, (1ULL << 63) -1},
+ {{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 10, 1ULL << 63},
+ {{0x01, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0x7F, 0xFF}, 10, ~0ULL}
+};
+
+int main(int, char**)
+{
+ for (int i= array_elements(fts_info); i--;)
+ {
+ byte buf[10];
+ const byte* fts_buf= buf;
+ int32_t len= fts_encode_int(fts_info[i].val, buf) - &buf[0];
+ if (fts_info[i].len == len &&
+ !memcmp(&fts_info[i].buf, buf, len) &&
+ fts_decode_vlc(&fts_buf) == fts_info[i].val &&
+ fts_buf == &buf[len])
+ ok(true, "FTS Encoded for %d bytes", fts_info[i].len);
+ else
+ ok(false, "FTS Encoded for %d bytes", fts_info[i].len);
+ }
+}