summaryrefslogtreecommitdiff
path: root/innobase/row/row0sel.c
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/row/row0sel.c')
-rw-r--r--innobase/row/row0sel.c311
1 files changed, 217 insertions, 94 deletions
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
index 97a69f76eaa..81bbf5053c0 100644
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -65,41 +65,50 @@ row_sel_sec_rec_is_for_clust_rec(
rec_t* sec_rec, /* in: secondary index record */
dict_index_t* sec_index, /* in: secondary index */
rec_t* clust_rec, /* in: clustered index record */
- dict_index_t* clust_index __attribute__((unused)))
- /* in: clustered index */
+ dict_index_t* clust_index) /* in: clustered index */
{
- dict_col_t* col;
- byte* sec_field;
- ulint sec_len;
- byte* clust_field;
- ulint clust_len;
- ulint n;
- ulint i;
+ dict_field_t* ifield;
+ dict_col_t* col;
+ byte* sec_field;
+ ulint sec_len;
+ byte* clust_field;
+ ulint clust_len;
+ ulint n;
+ ulint i;
- n = dict_index_get_n_ordering_defined_by_user(sec_index);
+ UT_NOT_USED(clust_index);
- for (i = 0; i < n; i++) {
- col = dict_field_get_col(
- dict_index_get_nth_field(sec_index, i));
+ n = dict_index_get_n_ordering_defined_by_user(sec_index);
- clust_field = rec_get_nth_field(clust_rec,
- dict_col_get_clust_pos(col),
- &clust_len);
- sec_field = rec_get_nth_field(sec_rec, i, &sec_len);
+ for (i = 0; i < n; i++) {
+ ifield = dict_index_get_nth_field(sec_index, i);
+ col = dict_field_get_col(ifield);
+
+ clust_field = rec_get_nth_field(clust_rec,
+ dict_col_get_clust_pos(col),
+ &clust_len);
+ sec_field = rec_get_nth_field(sec_rec, i, &sec_len);
- if (sec_len != clust_len) {
+ if (ifield->prefix_len > 0
+ && clust_len != UNIV_SQL_NULL
+ && clust_len > ifield->prefix_len) {
- return(FALSE);
+ clust_len = ifield->prefix_len;
}
- if (0 != cmp_data_data(dict_col_get_type(col),
- clust_field, clust_len,
- sec_field, sec_len)) {
- return(FALSE);
- }
- }
+ if (sec_len != clust_len) {
- return(TRUE);
+ return(FALSE);
+ }
+
+ if (0 != cmp_data_data(dict_col_get_type(col),
+ clust_field, clust_len,
+ sec_field, sec_len)) {
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
}
/*************************************************************************
@@ -600,13 +609,35 @@ row_sel_get_clust_rec(
clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
- ut_ad(page_rec_is_user_rec(clust_rec));
+ /* Note: only if the search ends up on a non-infimum record is the
+ low_match value the real match to the search tuple */
+
+ if (!page_rec_is_user_rec(clust_rec)
+ || btr_pcur_get_low_match(&(plan->clust_pcur))
+ < dict_index_get_n_unique(index)) {
+
+ ut_a(rec_get_deleted_flag(rec));
+ ut_a(node->read_view);
+
+ /* In a rare case it is possible that no clust rec is found
+ for a delete-marked secondary index record: if in row0umod.c
+ in row_undo_mod_remove_clust_low() we have already removed
+ the clust rec, while purge is still cleaning and removing
+ secondary index records associated with earlier versions of
+ the clustered index record. In that case we know that the
+ clustered index record did not exist in the read view of
+ trx. */
+
+ clust_rec = NULL;
+
+ goto func_exit;
+ }
if (!node->read_view) {
/* Try to place a lock on the index record */
err = lock_clust_rec_read_check_and_lock(0, clust_rec, index,
- node->row_lock_mode, LOCK_ORDINARY, thr);
+ node->row_lock_mode, LOCK_ORDINARY, thr);
if (err != DB_SUCCESS) {
return(err);
@@ -656,13 +687,14 @@ row_sel_get_clust_rec(
*out_rec = clust_rec;
return(DB_SUCCESS);
- }
+ }
}
/* Fetch the columns needed in test conditions */
row_sel_fetch_columns(index, clust_rec,
UT_LIST_GET_FIRST(plan->columns));
+func_exit:
*out_rec = clust_rec;
return(DB_SUCCESS);
@@ -1244,6 +1276,8 @@ rec_loop:
/* PHASE 3: Get previous version in a consistent read */
+ cons_read_requires_clust_rec = FALSE;
+
if (consistent_read) {
/* This is a non-locking consistent read: if necessary, fetch
a previous version of the record */
@@ -1850,9 +1884,11 @@ row_printf_step(
}
/********************************************************************
-Converts a key value stored in MySQL format to an Innobase dtuple.
-The last field of the key value may be just a prefix of a fixed length
-field: hence the parameter key_len. */
+Converts a key value stored in MySQL format to an Innobase dtuple. The last
+field of the key value may be just a prefix of a fixed length field: hence
+the parameter key_len. But currently we do not allow search keys where the
+last field is only a prefix of the full key field len and print a warning if
+such appears. */
void
row_sel_convert_mysql_key_to_innobase(
@@ -1863,17 +1899,24 @@ row_sel_convert_mysql_key_to_innobase(
to index! */
byte* buf, /* in: buffer to use in field
conversions */
+ ulint buf_len, /* in: buffer length */
dict_index_t* index, /* in: index of the key value */
byte* key_ptr, /* in: MySQL key value */
ulint key_len) /* in: MySQL key value length */
{
+ byte* original_buf = buf;
+ dict_field_t* field;
dfield_t* dfield;
- ulint offset;
- ulint len;
+ ulint data_offset;
+ ulint data_len;
+ ulint data_field_len;
+ ibool is_null;
byte* key_end;
ulint n_fields = 0;
+ ulint type;
- UT_NOT_USED(index);
+ /* For documentation of the key value storage format in MySQL, see
+ ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
key_end = key_ptr + key_len;
@@ -1882,11 +1925,14 @@ row_sel_convert_mysql_key_to_innobase(
dtuple_set_n_fields(tuple, ULINT_MAX);
dfield = dtuple_get_nth_field(tuple, 0);
+ field = dict_index_get_nth_field(index, 0);
if (dfield_get_type(dfield)->mtype == DATA_SYS) {
- /* A special case: we are looking for a position in a
- generated clustered index: the first and the only
- ordering column is ROW_ID */
+ /* A special case: we are looking for a position in the
+ generated clustered index which InnoDB automatically added
+ to a table with no primary key: the first and the only
+ ordering column is ROW_ID which InnoDB stored to the key_ptr
+ buffer. */
ut_a(key_len == DATA_ROW_ID_LEN);
@@ -1897,70 +1943,114 @@ row_sel_convert_mysql_key_to_innobase(
return;
}
- while (key_ptr < key_end) {
- offset = 0;
- len = dfield_get_type(dfield)->len;
+ while (key_ptr < key_end) {
- n_fields++;
+ ut_a(dict_col_get_type(field->col)->mtype
+ == dfield_get_type(dfield)->mtype);
+
+ data_offset = 0;
+ is_null = FALSE;
if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
/* The first byte in the field tells if this is
an SQL NULL value */
- offset = 1;
+ data_offset = 1;
- if (*key_ptr != 0) {
+ if (*key_ptr != 0) {
dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
- goto next_part;
+ is_null = TRUE;
}
}
- row_mysql_store_col_in_innobase_format(
- dfield, buf, key_ptr + offset, len,
- dfield_get_type(dfield)->mtype,
+ type = dfield_get_type(dfield)->mtype;
+
+ /* Calculate data length and data field total length */
+
+ if (type == DATA_BLOB) {
+ /* The key field is a column prefix of a BLOB or
+ TEXT type column */
+
+ ut_a(field->prefix_len > 0);
+
+ /* MySQL stores the actual data length to the first 2
+ bytes after the optional SQL NULL marker byte. The
+ storage format is little-endian. */
+
+ /* There are no key fields > 255 bytes currently in
+ MySQL */
+ if (key_ptr[data_offset + 1] != 0) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Error: BLOB or TEXT prefix > 255 bytes in query to table %s\n",
+ index->table_name);
+ }
+
+ data_len = key_ptr[data_offset];
+ data_field_len = data_offset + 2 + field->prefix_len;
+ data_offset += 2;
+
+ type = DATA_CHAR; /* now that we know the length, we
+ store the column value like it would
+ be a fixed char field */
+ } else if (field->prefix_len > 0) {
+ data_len = field->prefix_len;
+ data_field_len = data_offset + data_len;
+ } else {
+ data_len = dfield_get_type(dfield)->len;
+ data_field_len = data_offset + data_len;
+ }
+
+ /* Storing may use at most data_len bytes of buf */
+
+ if (!is_null) {
+ row_mysql_store_col_in_innobase_format(
+ dfield, buf, key_ptr + data_offset,
+ data_len, type,
dfield_get_type(dfield)->prtype
& DATA_UNSIGNED);
- next_part:
- key_ptr += (offset + len);
+ buf += data_len;
+ }
+
+ key_ptr += data_field_len;
if (key_ptr > key_end) {
- /* The last field in key was not a complete
- field but a prefix of it.
+ /* The last field in key was not a complete key field
+ but a prefix of it.
- Print a warning about this! HA_READ_PREFIX_LAST
- does not currently work in InnoDB with partial-field
- key value prefixes. Since MySQL currently uses a
- padding trick to calculate LIKE 'abc%' type queries
- there should never be partial-field prefixes
- in searches. */
+ Print a warning about this! HA_READ_PREFIX_LAST does
+ not currently work in InnoDB with partial-field key
+ value prefixes. Since MySQL currently uses a padding
+ trick to calculate LIKE 'abc%' type queries there
+ should never be partial-field prefixes in searches. */
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: using a partial-field key prefix in search\n");
- ut_ad(dfield_get_len(dfield) != UNIV_SQL_NULL);
-
- dfield_set_data(dfield, buf,
- len - (ulint)(key_ptr - key_end));
+ if (!is_null) {
+ dfield->len -= (ulint)(key_ptr - key_end);
+ }
}
- buf += len;
-
+ n_fields++;
+ field++;
dfield++;
}
- /* We set the length of tuple to n_fields: we assume that
- the memory area allocated for it is big enough (usually
- bigger than n_fields). */
+ ut_a(buf <= original_buf + buf_len);
+
+ /* We set the length of tuple to n_fields: we assume that the memory
+ area allocated for it is big enough (usually bigger than n_fields). */
dtuple_set_n_fields(tuple, n_fields);
}
/******************************************************************
Stores the row id to the prebuilt struct. */
-UNIV_INLINE
+static
void
row_sel_store_row_id_to_prebuilt(
/*=============================*/
@@ -1970,11 +2060,22 @@ row_sel_store_row_id_to_prebuilt(
{
byte* data;
ulint len;
+ char err_buf[1000];
data = rec_get_nth_field(index_rec,
dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
- ut_a(len == DATA_ROW_ID_LEN);
+ if (len != DATA_ROW_ID_LEN) {
+ rec_sprintf(err_buf, 900, index_rec);
+
+ fprintf(stderr,
+"InnoDB: Error: Row id field is wrong length %lu in table %s index %s\n"
+"InnoDB: Field number %lu, record:\n%s\n",
+ len, index->table_name, index->name,
+ dict_index_get_sys_col_pos(index, DATA_ROW_ID),
+ err_buf);
+ ut_a(0);
+ }
ut_memcpy(prebuilt->row_id, data, len);
}
@@ -2210,7 +2311,10 @@ row_sel_get_clust_rec_for_mysql(
/* out: DB_SUCCESS or error code */
row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */
dict_index_t* sec_index,/* in: secondary index where rec resides */
- rec_t* rec, /* in: record in a non-clustered index */
+ rec_t* rec, /* in: record in a non-clustered index; if
+ this is a locking read, then rec is not
+ allowed to be delete-marked, and that would
+ not make sense either */
que_thr_t* thr, /* in: query thread */
rec_t** out_rec,/* out: clustered record or an old version of
it, NULL if the old version did not exist
@@ -2226,7 +2330,7 @@ row_sel_get_clust_rec_for_mysql(
ulint err;
trx_t* trx;
char err_buf[1000];
-
+
*out_rec = NULL;
row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec);
@@ -2239,26 +2343,47 @@ row_sel_get_clust_rec_for_mysql(
clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
- if (!page_rec_is_user_rec(clust_rec)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error clustered record for sec rec not found\n"
- "InnoDB: index %s table %s\n", sec_index->name,
- sec_index->table->name);
+ /* Note: only if the search ends up on a non-infimum record is the
+ low_match value the real match to the search tuple */
- rec_sprintf(err_buf, 900, rec);
- fprintf(stderr, "InnoDB: sec index record %s\n", err_buf);
+ if (!page_rec_is_user_rec(clust_rec)
+ || btr_pcur_get_low_match(prebuilt->clust_pcur)
+ < dict_index_get_n_unique(clust_index)) {
+
+ /* In a rare case it is possible that no clust rec is found
+ for a delete-marked secondary index record: if in row0umod.c
+ in row_undo_mod_remove_clust_low() we have already removed
+ the clust rec, while purge is still cleaning and removing
+ secondary index records associated with earlier versions of
+ the clustered index record. In that case we know that the
+ clustered index record did not exist in the read view of
+ trx. */
- rec_sprintf(err_buf, 900, clust_rec);
- fprintf(stderr, "InnoDB: clust index record %s\n", err_buf);
+ if (!rec_get_deleted_flag(rec)
+ || prebuilt->select_lock_type != LOCK_NONE) {
- trx = thr_get_trx(thr);
- trx_print(err_buf, trx);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: error clustered record for sec rec not found\n"
+ "InnoDB: index %s table %s\n", sec_index->name,
+ sec_index->table->name);
- fprintf(stderr,
- "%s\nInnoDB: Make a detailed bug report and send it\n",
- err_buf);
- fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
+ rec_sprintf(err_buf, 900, rec);
+ fprintf(stderr,
+ "InnoDB: sec index record %s\n", err_buf);
+
+ rec_sprintf(err_buf, 900, clust_rec);
+ fprintf(stderr,
+ "InnoDB: clust index record %s\n", err_buf);
+
+ trx = thr_get_trx(thr);
+ trx_print(err_buf, trx);
+
+ fprintf(stderr,
+ "%s\nInnoDB: Make a detailed bug report and send it\n",
+ err_buf);
+ fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
+ }
clust_rec = NULL;
@@ -2936,8 +3061,6 @@ rec_loop:
/*-------------------------------------------------------------*/
/* PHASE 4: Look for matching records in a loop */
- cons_read_requires_clust_rec = FALSE;
-
rec = btr_pcur_get_rec(pcur);
/*
printf("Using index %s cnt %lu ", index->name, cnt);
@@ -3044,7 +3167,7 @@ rec_loop:
if (prebuilt->select_lock_type != LOCK_NONE
&& set_also_gap_locks) {
- /* Try to place a lock on the index record */
+ /* Try to place a lock on the index record */
err = sel_set_rec_lock(rec, index,
prebuilt->select_lock_type,
@@ -3092,6 +3215,8 @@ rec_loop:
/* We are ready to look at a possible new index entry in the result
set: the cursor is now placed on a user record */
+ cons_read_requires_clust_rec = FALSE;
+
if (prebuilt->select_lock_type != LOCK_NONE) {
/* Try to place a lock on the index record; note that delete
marked records are a special case in a unique search. If there
@@ -3117,8 +3242,6 @@ rec_loop:
/* This is a non-locking consistent read: if necessary, fetch
a previous version of the record */
- cons_read_requires_clust_rec = FALSE;
-
if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
/* Do nothing: we let a non-locking SELECT read the
@@ -3162,7 +3285,7 @@ rec_loop:
if (rec_get_deleted_flag(rec) && !cons_read_requires_clust_rec) {
- /* The record is delete marked: we can skip it if this is
+ /* The record is delete-marked: we can skip it if this is
not a consistent read which might see an earlier version
of a non-clustered index record */
@@ -3275,7 +3398,7 @@ got_row:
goto normal_return;
next_rec:
- /*-------------------------------------------------------------*/
+ /*-------------------------------------------------------------*/
/* PHASE 5: Move the cursor to the next index record */
if (mtr_has_extra_clust_latch) {