5 files changed, 229 insertions, 41 deletions
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c
index 1a7864be5e9..c89f41f69ad 100644
--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@@ -225,11 +225,15 @@ ulint
 row_ins_sec_index_entry_by_modify(
 /*==============================*/
 				/* out: DB_SUCCESS or error code */
+	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether mtr holds just a leaf
+				latch or also a tree latch */
 	btr_cur_t*	cursor,	/* in: B-tree cursor */
 	dtuple_t*	entry,	/* in: index entry to insert */
 	que_thr_t*	thr,	/* in: query thread */
 	mtr_t*		mtr)	/* in: mtr */
 {
+	big_rec_t*	dummy_big_rec;
 	mem_heap_t*	heap;
 	upd_t*		update;
 	rec_t*		rec;
@@ -241,16 +245,28 @@ row_ins_sec_index_entry_by_modify(
 	ut_ad(rec_get_deleted_flag(rec));
 	
 	/* We know that in the alphabetical ordering, entry and rec are
-	identical. But in their binary form there may be differences if
+	identified. But in their binary form there may be differences if
 	there are char fields in them. Therefore we have to calculate the
-	difference and do an update-in-place if necessary. */
+	difference. */
 	
 	heap = mem_heap_create(1024);
 	
 	update = row_upd_build_sec_rec_difference_binary(cursor->index,
 							entry, rec, heap); 
+	if (mode == BTR_MODIFY_LEAF) {
+		/* Try an optimistic updating of the record, keeping changes
+		within the page */
 
-	err = btr_cur_update_sec_rec_in_place(cursor, update, thr, mtr);
+		err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
+						update, 0, thr, mtr);
+		if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+			err = DB_FAIL;
+		}
+	} else  {
+		ut_a(mode == BTR_MODIFY_TREE);
+		err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
+					&dummy_big_rec, update, 0, thr, mtr);
+	}
 
 	mem_heap_free(heap);
 
@@ -1838,7 +1854,8 @@ row_ins_index_entry_low(
 							ext_vec, n_ext_vec,
 							thr, &mtr);
 		} else {
-			err = row_ins_sec_index_entry_by_modify(&cursor, entry,
+			err = row_ins_sec_index_entry_by_modify(mode, &cursor,
+								entry,
 								thr, &mtr);
 		}
 		
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
index f41574b2855..e6080a5f201 100644
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -51,7 +51,8 @@ to que_run_threads: this is to allow canceling runaway queries */
 
 /************************************************************************
 Returns TRUE if the user-defined column values in a secondary index record
-are the same as the corresponding columns in the clustered index record.
+are alphabetically the same as the corresponding columns in the clustered
+index record.
 NOTE: the comparison is NOT done as a binary comparison, but character
 fields are compared with collation! */
 static
@@ -96,11 +97,6 @@ row_sel_sec_rec_is_for_clust_rec(
 		       clust_len = ifield->prefix_len;
 		}
 
-                if (sec_len != clust_len) {
-
-                        return(FALSE);
-                }
-
                 if (0 != cmp_data_data(dict_col_get_type(col),
                                         clust_field, clust_len,
                                         sec_field, sec_len)) {
diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c
index 0a050e0a1b6..1bfd71f8c64 100644
--- a/innobase/row/row0umod.c
+++ b/innobase/row/row0umod.c
@@ -377,8 +377,14 @@ row_undo_mod_del_mark_or_remove_sec_low(
 }
 
 /***************************************************************
-Delete marks or removes a secondary index entry if found. */
-UNIV_INLINE
+Delete marks or removes a secondary index entry if found.
+NOTE that if we updated the fields of a delete-marked secondary index record
+so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
+return to the original values because we do not know them. But this should
+not cause problems because in row0sel.c, in queries we always retrieve the
+clustered index record or an earlier version of it, if the secondary index
+record through which we do the search is delete-marked. */
+static
 ulint
 row_undo_mod_del_mark_or_remove_sec(
 /*================================*/
@@ -403,20 +409,31 @@ row_undo_mod_del_mark_or_remove_sec(
 }
 
 /***************************************************************
-Delete unmarks a secondary index entry which must be found. */
+Delete unmarks a secondary index entry which must be found. It might not be
+delete-marked at the moment, but it does not harm to unmark it anyway. We also
+need to update the fields of the secondary index record if we updated its
+fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */
 static
-void
-row_undo_mod_del_unmark_sec(
-/*========================*/
+ulint
+row_undo_mod_del_unmark_sec_and_undo_update(
+/*========================================*/
+				/* out: DB_FAIL or DB_SUCCESS or
+				DB_OUT_OF_FILE_SPACE */
+	ulint		mode,	/* in: search mode: BTR_MODIFY_LEAF or
+				BTR_MODIFY_TREE */
 	undo_node_t*	node,	/* in: row undo node */
 	que_thr_t*	thr,	/* in: query thread */
 	dict_index_t*	index,	/* in: index */
 	dtuple_t*	entry)	/* in: index entry */
 {
+	mem_heap_t*	heap;
 	btr_pcur_t	pcur;
 	btr_cur_t*	btr_cur;
-	ulint		err;
+	upd_t*		update;
+	rec_t*		rec;
+	ulint		err		= DB_SUCCESS;
 	ibool		found;
+	big_rec_t*	dummy_big_rec;
 	mtr_t		mtr;
 	char           	err_buf[1000];
 
@@ -425,8 +442,8 @@ row_undo_mod_del_unmark_sec(
 	log_free_check();
 	mtr_start(&mtr);
 	
-	found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
-									&mtr);
+	found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
+
 	if (!found) {
 	  	fprintf(stderr,
 			"InnoDB: error in sec index entry del undo in\n"
@@ -443,17 +460,47 @@ row_undo_mod_del_unmark_sec(
 			"%s\nInnoDB: Make a detailed bug report and send it\n",
 			err_buf);
 	  	fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
-
 	} else {
          	btr_cur = btr_pcur_get_btr_cur(&pcur);
 
+		rec = btr_cur_get_rec(btr_cur);
+
 	        err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
 						btr_cur, FALSE, thr, &mtr);
-	        ut_ad(err == DB_SUCCESS);
+	        ut_a(err == DB_SUCCESS);
+		heap = mem_heap_create(100);
+
+		update = row_upd_build_sec_rec_difference_binary(index, entry,
+								rec, heap);
+	        if (upd_get_n_fields(update) == 0) {
+
+			/* Do nothing */
+		
+		} else if (mode == BTR_MODIFY_LEAF) {
+                	/* Try an optimistic updating of the record, keeping
+			changes within the page */
+
+                	err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG
+							| BTR_NO_LOCKING_FLAG,
+ 						btr_cur, update, 0, thr, &mtr);
+                	if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
+                        	err = DB_FAIL;
+                	}
+       		} else  {
+                	ut_a(mode == BTR_MODIFY_TREE);
+                	err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG
+							| BTR_NO_LOCKING_FLAG,
+						btr_cur, &dummy_big_rec,
+						update, 0, thr, &mtr);
+        	}			
+
+		mem_heap_free(heap);
 	}
 
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
+
+	return(err);
 }
 
 /***************************************************************
@@ -501,13 +548,14 @@ static
 ulint
 row_undo_mod_del_mark_sec(
 /*======================*/
-				/* out: DB_SUCCESS */
+				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 	undo_node_t*	node,	/* in: row undo node */
 	que_thr_t*	thr)	/* in: query thread */
 {
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
 	dict_index_t*	index;
+	ulint		err;
 
 	heap = mem_heap_create(1024);
 
@@ -516,7 +564,21 @@ row_undo_mod_del_mark_sec(
 
 		entry = row_build_index_entry(node->row, index, heap);
 		
-		row_undo_mod_del_unmark_sec(node, thr, index, entry);
+		err = row_undo_mod_del_unmark_sec_and_undo_update(
+						BTR_MODIFY_LEAF,
+						node, thr, index, entry);
+		if (err == DB_FAIL) {
+			err = row_undo_mod_del_unmark_sec_and_undo_update(
+						BTR_MODIFY_TREE,
+						node, thr, index, entry);
+		}
+
+		if (err != DB_SUCCESS) {
+
+			mem_heap_free(heap);
+
+			return(err);
+		}
 
 		node->index = dict_table_get_next_index(node->index);
 	}
@@ -532,7 +594,7 @@ static
 ulint
 row_undo_mod_upd_exist_sec(
 /*=======================*/
-				/* out: DB_SUCCESS or error code */
+				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 	undo_node_t*	node,	/* in: row undo node */
 	que_thr_t*	thr)	/* in: query thread */
 {
@@ -558,22 +620,48 @@ row_undo_mod_upd_exist_sec(
 			/* Build the newest version of the index entry */
 			entry = row_build_index_entry(node->row, index, heap);
 
+			/* NOTE that if we updated the fields of a
+			delete-marked secondary index record so that
+			alphabetically they stayed the same, e.g.,
+			'abc' -> 'aBc', we cannot return to the original
+			values because we do not know them. But this should
+			not cause problems because in row0sel.c, in queries
+			we always retrieve the clustered index record or an
+			earlier version of it, if the secondary index record
+			through which we do the search is delete-marked. */
+
 			err = row_undo_mod_del_mark_or_remove_sec(node, thr,
-							index, entry);
+								index, entry);
 			if (err != DB_SUCCESS) {
 				mem_heap_free(heap);
 
 				return(err);
 			}
-							
+
 			/* We may have to update the delete mark in the
 			secondary index record of the previous version of
-			the row */
+			the row. We also need to update the fields of
+			the secondary index record if we updated its fields
+			but alphabetically they stayed the same, e.g.,
+			'abc' -> 'aBc'. */
 
 			row_upd_index_replace_new_col_vals(entry, index,
 							node->update, NULL);
+			err = row_undo_mod_del_unmark_sec_and_undo_update(
+						BTR_MODIFY_LEAF,
+						node, thr, index, entry);
+			if (err == DB_FAIL) {
+				err =
+				   row_undo_mod_del_unmark_sec_and_undo_update(
+						BTR_MODIFY_TREE,
+						node, thr, index, entry);
+			}
 
-			row_undo_mod_del_unmark_sec(node, thr, index, entry);
+			if (err != DB_SUCCESS) {
+				mem_heap_free(heap);
+
+				return(err);
+			}
 		}
 
 		node->index = dict_table_get_next_index(node->index);
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
index 606f7404d50..b9e9b8c15c9 100644
--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@@ -361,8 +361,8 @@ row_upd_changes_field_size_or_external(
 				/* out: TRUE if the update changes the size of
 				some field in index or the field is external
 				in rec or update */
-	rec_t*		rec,	/* in: record in clustered index */
-	dict_index_t*	index,	/* in: clustered index */
+	rec_t*		rec,	/* in: record in index */
+	dict_index_t*	index,	/* in: index */
 	upd_t*		update)	/* in: update vector */
 {
 	upd_field_t*	upd_field;
@@ -372,8 +372,6 @@ row_upd_changes_field_size_or_external(
 	ulint		n_fields;
 	ulint		i;
 
-	ut_ad(index->type & DICT_CLUSTERED);
-
 	n_fields = upd_get_n_fields(update);
 
 	for (i = 0; i < n_fields; i++) {
@@ -698,7 +696,7 @@ row_upd_build_sec_rec_difference_binary(
 	ulint		i;
 
 	/* This function is used only for a secondary index */
-	ut_ad(0 == (index->type & DICT_CLUSTERED));
+	ut_a(0 == (index->type & DICT_CLUSTERED));
 
 	update = upd_create(dtuple_get_n_fields(entry), heap);
 
@@ -710,7 +708,13 @@ row_upd_build_sec_rec_difference_binary(
 
 		dfield = dtuple_get_nth_field(entry, i);
 
-		ut_a(len == dfield_get_len(dfield));
+		/* NOTE that it may be that len != dfield_get_len(dfield) if we
+		are updating in a character set and collation where strings of
+		different length can be equal in an alphabetical comparison,
+		and also in the case where we have a column prefix index
+		and the last characters in the index field are spaces; the
+		latter case probably caused the assertion failures reported at
+		row0upd.c line 713 in versions 4.0.14 - 4.0.16. */
 
 		/* NOTE: we compare the fields as binary strings!
 		(No collation) */
@@ -820,12 +824,76 @@ Replaces the new column values stored in the update vector to the index entry
 given. */
 
 void
+row_upd_index_replace_new_col_vals_index_pos(
+/*=========================================*/
+	dtuple_t*	entry,	/* in/out: index entry where replaced */
+	dict_index_t*	index,	/* in: index; NOTE that this may also be a
+				non-clustered index */
+	upd_t*		update,	/* in: an update vector built for the index so
+				that the field number in an upd_field is the
+				index position */
+	mem_heap_t*	heap)	/* in: memory heap to which we allocate and
+				copy the new values, set this as NULL if you
+				do not want allocation */
+{
+	dict_field_t*	field;
+	upd_field_t*	upd_field;
+	dfield_t*	dfield;
+	dfield_t*	new_val;
+	ulint		j;
+	ulint		i;
+
+	ut_ad(index);
+
+	dtuple_set_info_bits(entry, update->info_bits);
+
+	for (j = 0; j < dict_index_get_n_fields(index); j++) {
+
+	        field = dict_index_get_nth_field(index, j);
+
+		for (i = 0; i < upd_get_n_fields(update); i++) {
+
+		        upd_field = upd_get_nth_field(update, i);
+
+			if (upd_field->field_no == j) {
+
+			        dfield = dtuple_get_nth_field(entry, j);
+
+				new_val = &(upd_field->new_val);
+
+				dfield_set_data(dfield, new_val->data,
+								new_val->len);
+				if (heap && new_val->len != UNIV_SQL_NULL) {
+				        dfield->data = mem_heap_alloc(heap,
+								new_val->len);
+					ut_memcpy(dfield->data, new_val->data,
+								new_val->len);
+				}
+
+				if (field->prefix_len > 0
+			            && new_val->len != UNIV_SQL_NULL
+			            && new_val->len > field->prefix_len) {
+
+				        dfield->len = field->prefix_len;
+				}
+			}
+		}
+	}
+}
+
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
+void
 row_upd_index_replace_new_col_vals(
 /*===============================*/
 	dtuple_t*	entry,	/* in/out: index entry where replaced */
 	dict_index_t*	index,	/* in: index; NOTE that this may also be a
 				non-clustered index */
-	upd_t*		update,	/* in: update vector */
+	upd_t*		update,	/* in: an update vector built for the
+				CLUSTERED index so that the field number in
+				an upd_field is the clustered index position */
 	mem_heap_t*	heap)	/* in: memory heap to which we allocate and
 				copy the new values, set this as NULL if you
 				do not want allocation */
@@ -893,7 +961,9 @@ row_upd_changes_ord_field_binary(
 				known when this function is called, e.g., at
 				compile time */
 	dict_index_t*	index,	/* in: index of the record */
-	upd_t*		update)	/* in: update vector for the row */
+	upd_t*		update)	/* in: update vector for the row; NOTE: the
+				field numbers in this MUST be clustered index
+				positions! */
 {
 	upd_field_t*	upd_field;
 	dict_field_t*	ind_field;
diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c
index d4a463d8a96..fca56389e45 100644
--- a/innobase/row/row0vers.c
+++ b/innobase/row/row0vers.c
@@ -117,9 +117,10 @@ row_vers_impl_x_locked_off_kernel(
 		return(NULL);
 	}
 
-	/* We look up if some earlier version of the clustered index record
-	would require rec to be in a different state (delete marked or
-	unmarked, or not existing). If there is such a version, then rec was
+	/* We look up if some earlier version, which was modified by the trx_id
+	transaction, of the clustered index record would require rec to be in
+	a different state (delete marked or unmarked, or have different field
+	values, or not existing). If there is such a version, then rec was
 	modified by the trx_id transaction, and it has an implicit x-lock on
 	rec. Note that if clust_rec itself would require rec to be in a
 	different state, then the trx_id transaction has not yet had time to
@@ -188,6 +189,8 @@ row_vers_impl_x_locked_off_kernel(
 
 		vers_del = rec_get_deleted_flag(prev_version);
 
+		/* We check if entry and rec are identified in the alphabetical
+		ordering */
 		if (0 == cmp_dtuple_rec(entry, rec)) {
 			/* The delete marks of rec and prev_version should be
 			equal for rec to be in the state required by
@@ -198,6 +201,20 @@ row_vers_impl_x_locked_off_kernel(
 
 				break;
 			}
+
+			/* It is possible that the row was updated so that the
+			secondary index record remained the same in
+			alphabetical ordering, but the field values changed
+			still. For example, 'abc' -> 'ABC'. Check also that. */
+
+			dtuple_set_types_binary(entry,
+						dtuple_get_n_fields(entry));
+			if (0 != cmp_dtuple_rec(entry, rec)) {
+
+				trx = trx_get_on_id(trx_id);
+
+				break;
+			}
 		} else if (!rec_del) {
 			/* The delete mark should be set in rec for it to be
 			in the state required by prev_version */
@@ -256,8 +273,8 @@ row_vers_must_preserve_del_marked(
 Finds out if a version of the record, where the version >= the current
 purge view, should have ientry as its secondary index entry. We check
 if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE. */
+id >= purge view, and the secondary index entry and ientry are identified in
+the alphabetical ordering; exactly in this case we return TRUE. */
 
 ibool
 row_vers_old_has_index_entry(