Merge hundin:/my/mysql-4.0 into bitch.mysql.fi:/my/mysql-4.0

innobase/pars/pars0opt.c: Auto merged mysql-test/mysql-test-run.sh: Auto merged
author: unknown <monty@bitch.mysql.fi> 2002-06-03 14:43:44 +0300
committer: unknown <monty@bitch.mysql.fi> 2002-06-03 14:43:44 +0300
commit: dddd9084a0ca7ce1e9e60a65ddb3fb8087912249 (patch)
tree: 635cb4a7da4d21106255bd614f818e63c5905ede /innobase
parent: c8ca330db07a4739b58800ccc75c097a157417a6 (diff)
parent: 7daf5a5d0ee7b52508943c7095a2cc150abcf616 (diff)
download: mariadb-git-dddd9084a0ca7ce1e9e60a65ddb3fb8087912249.tar.gz
59 files changed, 3229 insertions, 540 deletions
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c
index c71ef017aa8..0263996a429 100644
--- a/innobase/btr/btr0btr.c
+++ b/innobase/btr/btr0btr.c
@@ -570,6 +570,19 @@ btr_page_get_father_for_rec(
 
 	node_ptr = btr_cur_get_rec(&cursor);
 
+	if (btr_node_ptr_get_child_page_no(node_ptr) !=
+                                                buf_frame_get_page_no(page)) {
+      		fprintf(stderr,
+"InnoDB: Corruption of an index tree: table %s, index %s,\n"
+"InnoDB: father ptr page no %lu, child page no %lu\n",
+                    (UT_LIST_GET_FIRST(tree->tree_indexes))->table_name,
+                    (UT_LIST_GET_FIRST(tree->tree_indexes))->name,
+                    btr_node_ptr_get_child_page_no(node_ptr),
+                    buf_frame_get_page_no(page));
+     		page_rec_print(page_rec_get_next(page_get_infimum_rec(page)));
+     		page_rec_print(node_ptr);
+	}
+
 	ut_a(btr_node_ptr_get_child_page_no(node_ptr) ==
 						buf_frame_get_page_no(page));
 	mem_heap_free(heap);
diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
index d6944ecadc8..1274719cf7d 100644
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@@ -204,7 +204,7 @@ btr_cur_search_to_nth_level(
 				the caller uses his search latch
 				to protect the record! */
 	btr_cur_t*	cursor, /* in/out: tree cursor; the cursor page is
-				   s- or x-latched, but see also above! */
+				s- or x-latched, but see also above! */
 	ulint		has_search_latch,/* in: info on the latch mode the
 				caller currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
@@ -228,6 +228,7 @@ btr_cur_search_to_nth_level(
 	ulint		insert_planned;
 	ulint		buf_mode;
 	ulint		estimate;
+	ulint		ignore_sec_unique;
 	ulint		root_height = 0; /* remove warning */
 #ifdef BTR_CUR_ADAPT
 	btr_search_t*	info;
@@ -246,7 +247,9 @@ btr_cur_search_to_nth_level(
 #endif	
 	insert_planned = latch_mode & BTR_INSERT;
 	estimate = latch_mode & BTR_ESTIMATE;
-	latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE);
+	ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE;
+	latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE
+					| BTR_IGNORE_SEC_UNIQUE);
 
 	ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
 	
@@ -343,7 +346,8 @@ btr_cur_search_to_nth_level(
 
 			rw_latch = latch_mode;
 
-			if (insert_planned && ibuf_should_try(index)) {
+			if (insert_planned && ibuf_should_try(index,
+							ignore_sec_unique)) {
 				
 				/* Try insert to the insert buffer if the
 				page is not in the buffer pool */
@@ -356,7 +360,6 @@ retry_page_get:
 					buf_mode,
 					IB__FILE__, __LINE__,
 					mtr);
-
 		if (page == NULL) {
 			/* This must be a search to perform an insert;
 			try insert to the insert buffer */
@@ -365,7 +368,7 @@ retry_page_get:
 			ut_ad(insert_planned);
 			ut_ad(cursor->thr);
 
-			if (ibuf_should_try(index) &&
+			if (ibuf_should_try(index, ignore_sec_unique) &&
 				ibuf_insert(tuple, index, space, page_no,
 							cursor->thr)) {
 				/* Insertion to the insert buffer succeeded */
diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
index 21b4c12ab56..8a54d9de9c0 100644
--- a/innobase/btr/btr0sea.c
+++ b/innobase/btr/btr0sea.c
@@ -743,7 +743,7 @@ btr_search_guess_on_hash(
 	
 #ifdef notdefined
 	/* These lines of code can be used in a debug version to check
-	correctness of the searched cursor position: */
+	the correctness of the searched cursor position: */
 	
 	info->last_hash_succ = FALSE;
 
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index adb61bfbb56..e840e9f143d 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -220,6 +220,10 @@ buf_calc_page_checksum(
 {
   	ulint checksum;
 
+	/* Since the fields FIL_PAGE_FILE_FLUSH_LSN and ..._ARCH_LOG_NO
+	are written outside the buffer pool to the first pages of data
+	files, we have to skip them in page checksum calculation */
+  	
   	checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
   		+ ut_fold_binary(page + FIL_PAGE_DATA,
 				UNIV_PAGE_SIZE - FIL_PAGE_DATA
@@ -279,8 +283,9 @@ buf_page_print(
 
 	ut_sprintf_buf(buf, read_buf, UNIV_PAGE_SIZE);
 
+	ut_print_timestamp(stderr);
 	fprintf(stderr,
-	"InnoDB: Page dump in ascii and hex (%u bytes):\n%s",
+	"  InnoDB: Page dump in ascii and hex (%u bytes):\n%s",
 					UNIV_PAGE_SIZE, buf);
 	fprintf(stderr, "InnoDB: End of page dump\n");
 
@@ -288,7 +293,8 @@ buf_page_print(
 
 	checksum = buf_calc_page_checksum(read_buf);
 
-	fprintf(stderr, "InnoDB: Page checksum %lu stored checksum %lu\n",
+	ut_print_timestamp(stderr);
+	fprintf(stderr, "  InnoDB: Page checksum %lu stored checksum %lu\n",
 			checksum, mach_read_from_4(read_buf
                                         + UNIV_PAGE_SIZE
 					- FIL_PAGE_END_LSN)); 
@@ -1358,47 +1364,87 @@ buf_page_io_complete(
 /*=================*/
 	buf_block_t*	block)	/* in: pointer to the block in question */
 {
-	dulint		id;
 	dict_index_t*	index;
+	dulint		id;
 	ulint		io_type;
-
+	ulint		read_page_no;
+	
 	ut_ad(block);
 
 	io_type = block->io_fix;
 
 	if (io_type == BUF_IO_READ) {
+		/* If this page is not uninitialized and not in the
+		doublewrite buffer, then the page number should be the
+		same as in block */
+
+		read_page_no = mach_read_from_4((block->frame)
+						+ FIL_PAGE_OFFSET);
+		if (read_page_no != 0
+			&& !trx_doublewrite_page_inside(read_page_no)
+	    		&& read_page_no != block->offset) {
+
+			fprintf(stderr,
+"InnoDB: Error: page n:o stored in the page read in is %lu, should be %lu!\n",
+				read_page_no, block->offset);
+		}
+#ifdef notdefined
+		if (block->offset != 0 && read_page_no == 0) {
+			/* Check that the page is really uninited */
+
+			for (i = 0; i < UNIV_PAGE_SIZE; i++) {
+
+				if (*((block->frame) + i) != '\0') {
+					fprintf(stderr,
+"InnoDB: Error: page n:o in the page read in is 0, but page %lu is inited!\n",
+						block->offset);
+					break;
+				}
+			}
+		}
+#endif
 		/* From version 3.23.38 up we store the page checksum
-		   to the 4 upper bytes of the page end lsn field */
+		   to the 4 first bytes of the page end lsn field */
 
 		if (buf_page_is_corrupted(block->frame)) {
 		  	fprintf(stderr,
-			  "InnoDB: Database page corruption or a failed\n"
-			  "InnoDB: file read of page %lu.\n", block->offset);
+		"InnoDB: Database page corruption on disk or a failed\n"
+		"InnoDB: file read of page %lu.\n", block->offset);
 			  
 		  	fprintf(stderr,
-			  "InnoDB: You may have to recover from a backup.\n");
+		"InnoDB: You may have to recover from a backup.\n");
 
 			buf_page_print(block->frame);
 
 		  	fprintf(stderr,
-			  "InnoDB: Database page corruption or a failed\n"
-			  "InnoDB: file read of page %lu.\n", block->offset);
+		"InnoDB: Database page corruption on disk or a failed\n"
+		"InnoDB: file read of page %lu.\n", block->offset);
 		  	fprintf(stderr,
-			  "InnoDB: You may have to recover from a backup.\n");
+		"InnoDB: You may have to recover from a backup.\n");
 			fprintf(stderr,
-			  "InnoDB: It is also possible that your operating\n"
-			  "InnoDB: system has corrupted its own file cache\n"
-			  "InnoDB: and rebooting your computer removes the\n"
-			  "InnoDB: error.\n");
+		"InnoDB: It is also possible that your operating\n"
+		"InnoDB: system has corrupted its own file cache\n"
+		"InnoDB: and rebooting your computer removes the\n"
+		"InnoDB: error.\n"
+		"InnoDB: If the corrupt page is an index page\n"
+		"InnoDB: you can also try to fix the corruption\n"
+		"InnoDB: by dumping, dropping, and reimporting\n"
+		"InnoDB: the corrupt table. You can use CHECK\n"
+		"InnoDB: TABLE to scan your table for corruption.\n"
+		"InnoDB: Look also at section 6.1 of\n"
+		"InnoDB: http://www.innodb.com/ibman.html about\n"
+		"InnoDB: forcing recovery.\n");
 			  
-			if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { 
+			if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+				fprintf(stderr,
+	"InnoDB: Ending processing because of a corrupt database page.\n");
 		  		exit(1);
 		  	}
 		}
 
 		if (recv_recovery_is_on()) {
-			recv_recover_page(TRUE, block->frame, block->space,
-								block->offset);
+			recv_recover_page(FALSE, TRUE, block->frame,
+						block->space, block->offset);
 		}
 
 		if (!recv_no_ibuf_operations) {
diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
index 8184f10d6e9..4c6850af078 100644
--- a/innobase/buf/buf0flu.c
+++ b/innobase/buf/buf0flu.c
@@ -328,6 +328,34 @@ try_again:
 }
 
 /************************************************************************
+Initializes a page for writing to the tablespace. */
+
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*	page,		/* in: page */
+	dulint	newest_lsn,	/* in: newest modification lsn to the page */
+	ulint	space,		/* in: space id */
+	ulint	page_no)	/* in: page number */
+{	
+	/* Write the newest modification lsn to the page */
+	mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
+
+	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN, newest_lsn);
+
+	/* Write to the page the space id and page number */
+
+	mach_write_to_4(page + FIL_PAGE_SPACE, space);
+	mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
+
+	/* We overwrite the first 4 bytes of the end lsn field to store
+	a page checksum */
+
+	mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
+					buf_calc_page_checksum(page));
+}
+
+/************************************************************************
 Does an asynchronous write of a buffer page. NOTE: in simulated aio and
 also when the doublewrite buffer is used, we must call
 buf_flush_buffered_writes after we have posted a batch of writes! */
@@ -349,23 +377,8 @@ buf_flush_write_block_low(
 	/* Force the log to the disk before writing the modified block */
 	log_flush_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS);
 #endif	
-	/* Write the newest modification lsn to the page */
-	mach_write_to_8(block->frame + FIL_PAGE_LSN,
-						block->newest_modification);
-	mach_write_to_8(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
-						block->newest_modification);
-
-	/* Write to the page the space id and page number */
-
-	mach_write_to_4(block->frame + FIL_PAGE_SPACE, block->space);
-	mach_write_to_4(block->frame + FIL_PAGE_OFFSET, block->offset);
-
-	/* We overwrite the first 4 bytes of the end lsn field to store
-	a page checksum */
-
-	mach_write_to_4(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
-			buf_calc_page_checksum(block->frame));
-
+	buf_flush_init_for_writing(block->frame, block->newest_modification,
+						block->space, block->offset);
 	if (!trx_doublewrite) {
 		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
 			FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
index 69b1e7c61fd..0da59d39646 100644
--- a/innobase/dict/dict0crea.c
+++ b/innobase/dict/dict0crea.c
@@ -1201,7 +1201,8 @@ loop:
 					ut_dulint_get_low(id),
 					table->name,
 					foreign->referenced_table_name,
-					foreign->n_fields);
+					foreign->n_fields
+					+ (foreign->type << 24));
 
 	for (i = 0; i < foreign->n_fields; i++) {
 
diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c
index 91bc770320f..5f6d947bb92 100644
--- a/innobase/dict/dict0dict.c
+++ b/innobase/dict/dict0dict.c
@@ -281,7 +281,8 @@ dict_table_autoinc_initialize(
 }
 
 /************************************************************************
-Gets the next autoinc value, 0 if not yet initialized. */
+Gets the next autoinc value, 0 if not yet initialized. If initialized,
+increments the counter by 1. */
 
 ib_longlong
 dict_table_autoinc_get(
@@ -307,6 +308,32 @@ dict_table_autoinc_get(
 }
 
 /************************************************************************
+Reads the autoinc counter value, 0 if not yet initialized. Does not
+increment the counter. */
+
+ib_longlong
+dict_table_autoinc_read(
+/*====================*/
+				/* out: value of the counter */
+	dict_table_t*	table)	/* in: table */
+{
+	ib_longlong	value;
+
+	mutex_enter(&(table->autoinc_mutex));
+
+	if (!table->autoinc_inited) {
+
+		value = 0;
+	} else {
+		value = table->autoinc;
+	}
+	
+	mutex_exit(&(table->autoinc_mutex));
+
+	return(value);
+}
+
+/************************************************************************
 Updates the autoinc counter if the value supplied is bigger than the
 current value. If not inited, does nothing. */
 
@@ -648,7 +675,10 @@ dict_table_rename_in_cache(
 /*=======================*/
 					/* out: TRUE if success */
 	dict_table_t*	table,		/* in: table */
-	char*		new_name)	/* in: new name */
+	char*		new_name,	/* in: new name */
+	ibool		rename_also_foreigns)/* in: in ALTER TABLE we want
+					to preserve the original table name
+					in constraints which reference it */
 {
 	dict_foreign_t*	foreign;
 	dict_index_t*	index;
@@ -706,6 +736,41 @@ dict_table_rename_in_cache(
 		index = dict_table_get_next_index(index);
 	}
 
+	if (!rename_also_foreigns) {
+		/* In ALTER TABLE we think of the rename table operation
+		in the direction table -> temporary table (#sql...)
+		as dropping the table with the old name and creating
+		a new with the new name. Thus we kind of drop the
+		constraints from the dictionary cache here. The foreign key
+		constraints will be inherited to the new table from the
+		system tables through a call of dict_load_foreigns. */
+	
+		/* Remove the foreign constraints from the cache */
+		foreign = UT_LIST_GET_LAST(table->foreign_list);
+
+		while (foreign != NULL) {
+			dict_foreign_remove_from_cache(foreign);
+			foreign = UT_LIST_GET_LAST(table->foreign_list);
+		}
+
+		/* Reset table field in referencing constraints */
+
+		foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+		while (foreign != NULL) {
+			foreign->referenced_table = NULL;
+			foreign->referenced_index = NULL;
+		
+			foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+		}
+
+		/* Make the list of referencing constraints empty */
+
+		UT_LIST_INIT(table->referenced_list);
+		
+		return(TRUE);
+	}
+
 	/* Update the table name fields in foreign constraints */
 
 	foreign = UT_LIST_GET_FIRST(table->foreign_list);
@@ -772,8 +837,6 @@ dict_table_remove_from_cache(
 	foreign = UT_LIST_GET_LAST(table->foreign_list);
 
 	while (foreign != NULL) {
-		ut_a(0 == ut_strcmp(foreign->foreign_table_name, table->name));
-
 		dict_foreign_remove_from_cache(foreign);
 		foreign = UT_LIST_GET_LAST(table->foreign_list);
 	}
@@ -783,8 +846,6 @@ dict_table_remove_from_cache(
 	foreign = UT_LIST_GET_FIRST(table->referenced_list);
 
 	while (foreign != NULL) {
-		ut_a(0 == ut_strcmp(foreign->referenced_table_name,
-								table->name));
 		foreign->referenced_table = NULL;
 		foreign->referenced_index = NULL;
 		
@@ -1591,7 +1652,7 @@ dict_foreign_find_index(
 							->col->name;
 				if (ut_strlen(columns[i]) !=
 						ut_strlen(col_name)
-				    || 0 != ut_memcmp(columns[i],
+				    || 0 != ut_cmp_in_lower_case(columns[i],
 				    			col_name,
 				  			ut_strlen(col_name))) {
 				  	break;
@@ -1632,8 +1693,9 @@ dict_foreign_add_to_cache(
 {
 	dict_table_t*	for_table;
 	dict_table_t*	ref_table;
-	dict_foreign_t*	for_in_cache	= NULL;
+	dict_foreign_t*	for_in_cache			= NULL;
 	dict_index_t*	index;
+	ibool		added_to_referenced_list	= FALSE;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -1677,6 +1739,7 @@ dict_foreign_add_to_cache(
 		UT_LIST_ADD_LAST(referenced_list,
 					ref_table->referenced_list,
 					for_in_cache);
+		added_to_referenced_list = TRUE;
 	}
 
 	if (for_in_cache->foreign_table == NULL && for_table) {
@@ -1687,6 +1750,12 @@ dict_foreign_add_to_cache(
 
 		if (index == NULL) {
 			if (for_in_cache == foreign) {
+				if (added_to_referenced_list) {
+					UT_LIST_REMOVE(referenced_list,
+						ref_table->referenced_list,
+						for_in_cache);
+				}
+			
 				mem_heap_free(foreign->heap);
 			}
 
@@ -1788,8 +1857,9 @@ dict_scan_col(
 	ibool*		success,/* out: TRUE if success */
 	dict_table_t*	table,	/* in: table in which the column is */
 	dict_col_t**	column,	/* out: pointer to column if success */
-	char**		column_name)/* out: pointer to column->name if
+	char**		column_name,/* out: pointer to column->name if
 				success */
+	ulint*		column_name_len)/* out: column name length */
 {
 	dict_col_t*	col;
 	char*		old_ptr;
@@ -1806,29 +1876,46 @@ dict_scan_col(
 		return(ptr);
 	}
 
+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	old_ptr = ptr;
 	
-	while (!isspace(*ptr) && *ptr != ',' && *ptr != ')') {
+	while (!isspace(*ptr) && *ptr != ',' && *ptr != ')' && 	*ptr != '`') {
+
 		ptr++;
 	}
 
-	for (i = 0; i < dict_table_get_n_cols(table); i++) {
+	*column_name_len = (ulint)(ptr - old_ptr);
+	
+	if (table == NULL) {
+		*success = TRUE;
+		*column = NULL;
+		*column_name = old_ptr;
+	} else {
+	    	for (i = 0; i < dict_table_get_n_cols(table); i++) {
 
-		col = dict_table_get_nth_col(table, i);
+			col = dict_table_get_nth_col(table, i);
 
-		if (ut_strlen(col->name) == (ulint)(ptr - old_ptr)
-		    && 0 == ut_cmp_in_lower_case(col->name, old_ptr,
+			if (ut_strlen(col->name) == (ulint)(ptr - old_ptr)
+			    && 0 == ut_cmp_in_lower_case(col->name, old_ptr,
 						(ulint)(ptr - old_ptr))) {
-		    	/* Found */
+		    		/* Found */
 
-		    	*success = TRUE;
-		    	*column = col;
-		    	*column_name = col->name;
+		    		*success = TRUE;
+		    		*column = col;
+		    		*column_name = col->name;
 
-		    	break;
+		    		break;
+			}
 		}
 	}
 	
+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	return(ptr);
 }
 
@@ -1840,14 +1927,18 @@ dict_scan_table_name(
 /*=================*/
 				/* out: scanned to */
 	char*		ptr,	/* in: scanned to */
-	dict_table_t**	table,	/* out: table object or NULL if error */
-	char*		name)	/* in: foreign key table name */
+	dict_table_t**	table,	/* out: table object or NULL */
+	char*		name,	/* in: foreign key table name */
+	ibool*		success,/* out: TRUE if ok name found */
+	char*		second_table_name)/* in/out: buffer where to store
+				the referenced table name; must be at least
+				2500 bytes */
 {
 	char*	dot_ptr			= NULL;
 	char*	old_ptr;
 	ulint	i;
-	char	second_table_name[10000];
 	
+	*success = FALSE;
 	*table = NULL;
 
 	while (isspace(*ptr)) {
@@ -1859,9 +1950,13 @@ dict_scan_table_name(
 		return(ptr);
 	}
 
+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	old_ptr = ptr;
 	
-	while (!isspace(*ptr) && *ptr != '(') {
+	while (!isspace(*ptr) && *ptr != '(' && *ptr != '`') {
 		if (*ptr == '.') {
 			dot_ptr = ptr;
 		}
@@ -1869,7 +1964,7 @@ dict_scan_table_name(
 		ptr++;
 	}
 
-	if (ptr - old_ptr > 9000) {
+	if (ptr - old_ptr > 2000) {
 		return(old_ptr);
 	}
 	
@@ -1900,8 +1995,14 @@ dict_scan_table_name(
 		second_table_name[ptr - old_ptr] = '\0';
 	}
 
+	*success = TRUE;
+
 	*table = dict_table_get_low(second_table_name);
 
+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	return(ptr);
 }
 
@@ -1944,8 +2045,8 @@ dict_create_foreign_constraints(
 /*============================*/
 				/* out: error code or DB_SUCCESS */
 	trx_t*	trx,		/* in: transaction */
-	char*	sql_string,	/* in: table create statement where
-				foreign keys are declared like:
+	char*	sql_string,	/* in: table create or ALTER TABLE
+				statement where foreign keys are declared like:
 				FOREIGN KEY (a, b) REFERENCES table2(c, d),
 				table2 can be written also with the database
 				name before it: test.table2; the default
@@ -1961,9 +2062,12 @@ dict_create_foreign_constraints(
 	ibool		success;
 	ulint		error;
 	ulint		i;
-	dict_col_t*	columns[1000];
-	char*		column_names[1000];
-
+	ulint		j;
+	dict_col_t*	columns[500];
+	char*		column_names[500];
+	ulint		column_name_lens[500];
+	char		referenced_table_name[2500];
+	
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
 	table = dict_table_get_low(name);
@@ -1971,10 +2075,11 @@ dict_create_foreign_constraints(
 	if (table == NULL) {
 		return(DB_ERROR);
 	}
+
 loop:
 	ptr = dict_scan_to(ptr, (char *) "FOREIGN");
 
-	if (*ptr == '\0' || dict_bracket_count(sql_string, ptr) != 1) {
+	if (*ptr == '\0') {
 
 		/* The following call adds the foreign key constraints
 		to the data dictionary system tables on disk */
@@ -2007,7 +2112,7 @@ loop:
 	/* Scan the columns in the first list */
 col_loop1:
 	ptr = dict_scan_col(ptr, &success, table, columns + i,
-							column_names + i);
+				column_names + i, column_name_lens + i);
 	if (!success) {
 		return(DB_CANNOT_ADD_CONSTRAINT);
 	}
@@ -2058,9 +2163,13 @@ col_loop1:
 					1 + ut_strlen(columns[i]->name));
 	}
 	
-	ptr = dict_scan_table_name(ptr, &referenced_table, name);
+	ptr = dict_scan_table_name(ptr, &referenced_table, name,
+					&success, referenced_table_name);
+
+	/* Note that referenced_table can be NULL if the user has suppressed
+	checking of foreign key constraints! */
 
-	if (!referenced_table) {
+	if (!success || (!referenced_table && trx->check_foreigns)) {
 		dict_foreign_free(foreign);
 
 		return(DB_CANNOT_ADD_CONSTRAINT);
@@ -2078,7 +2187,7 @@ col_loop1:
 
 col_loop2:
 	ptr = dict_scan_col(ptr, &success, referenced_table, columns + i,
-							column_names + i);
+				column_names + i, column_name_lens + i);
 	i++;
 	
 	if (!success) {
@@ -2100,43 +2209,104 @@ col_loop2:
 		return(DB_CANNOT_ADD_CONSTRAINT);
 	}
 
+	ptr = dict_accept(ptr, "ON", &success);
+
+	if (!success) {
+
+		goto try_find_index;
+	}
+
+	ptr = dict_accept(ptr, "DELETE", &success);
+
+	if (!success) {
+
+		goto try_find_index;
+	}
+
+	ptr = dict_accept(ptr, "CASCADE", &success);
+
+	if (success) {
+
+		foreign->type = DICT_FOREIGN_ON_DELETE_CASCADE;
+
+		goto try_find_index;
+	}
+
+	ptr = dict_accept(ptr, "SET", &success);
+
+	if (!success) {
+
+		goto try_find_index;
+	}
+
+	ptr = dict_accept(ptr, "NULL", &success);
+
+	if (success) {
+		for (j = 0; j < foreign->n_fields; j++) {
+			if ((dict_index_get_nth_type(
+				foreign->foreign_index, j)->prtype)
+				& DATA_NOT_NULL) {
+
+				/* It is not sensible to define SET NULL
+				if the column is not allowed to be NULL! */
+
+				dict_foreign_free(foreign);
+				return(DB_CANNOT_ADD_CONSTRAINT);
+			}
+		}
+
+		foreign->type = DICT_FOREIGN_ON_DELETE_SET_NULL;
+
+		goto try_find_index;
+	}
+	
+try_find_index:
 	/* Try to find an index which contains the columns as the first fields
 	and in the right order, and the types are the same as in
 	foreign->foreign_index */
 
-	index = dict_foreign_find_index(referenced_table, column_names, i,
+	if (referenced_table) {
+		index = dict_foreign_find_index(referenced_table,
+						column_names, i,
 						foreign->foreign_index);
-
-	if (!index) {
-		dict_foreign_free(foreign);
-		return(DB_CANNOT_ADD_CONSTRAINT);
+		if (!index) {
+			dict_foreign_free(foreign);
+			return(DB_CANNOT_ADD_CONSTRAINT);
+		}
+	} else {
+		ut_a(trx->check_foreigns == FALSE);
+		index = NULL;
 	}
 
 	foreign->referenced_index = index;
 	foreign->referenced_table = referenced_table;
 
 	foreign->referenced_table_name = mem_heap_alloc(foreign->heap,
-					1 + ut_strlen(referenced_table->name));
+					1 + ut_strlen(referenced_table_name));
 
-	ut_memcpy(foreign->referenced_table_name, referenced_table->name,
-					1 + ut_strlen(referenced_table->name));
+	ut_memcpy(foreign->referenced_table_name, referenced_table_name,
+					1 + ut_strlen(referenced_table_name));
 					
 	foreign->referenced_col_names = mem_heap_alloc(foreign->heap,
 							i * sizeof(void*));
 	for (i = 0; i < foreign->n_fields; i++) {
 		foreign->referenced_col_names[i]
 				= mem_heap_alloc(foreign->heap,
-					1 + ut_strlen(columns[i]->name));
-		ut_memcpy(
-			foreign->referenced_col_names[i], columns[i]->name,
-					1 + ut_strlen(columns[i]->name));
+					1 + column_name_lens[i]);
+		ut_memcpy(foreign->referenced_col_names[i], column_names[i],
+					column_name_lens[i]);
+		(foreign->referenced_col_names[i])[column_name_lens[i]] = '\0';
 	}
 
 	/* We found an ok constraint definition: add to the lists */
 	
 	UT_LIST_ADD_LAST(foreign_list, table->foreign_list, foreign);
-	UT_LIST_ADD_LAST(referenced_list, referenced_table->referenced_list,
+
+	if (referenced_table) {
+		UT_LIST_ADD_LAST(referenced_list,
+					referenced_table->referenced_list,
 								foreign);
+	}
 	goto loop;
 }
 
@@ -2889,11 +3059,96 @@ dict_field_print_low(
 }
 
 /**************************************************************************
+Sprintfs to a string info on foreign keys of a table in a format suitable
+for CREATE TABLE. */
+static
+void
+dict_print_info_on_foreign_keys_in_create_format(
+/*=============================================*/
+	char*		buf,	/* in: auxiliary buffer of 10000 chars */
+	char*		str,	/* in/out: pointer to a string */
+	ulint		len,	/* in: space in str available for info */
+	dict_table_t*	table)	/* in: table */
+{
+
+	dict_foreign_t*	foreign;
+	ulint		i;
+	char*		buf2;
+
+	buf2 = buf;
+
+	mutex_enter(&(dict_sys->mutex));
+
+	foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+	if (foreign == NULL) {
+		mutex_exit(&(dict_sys->mutex));
+
+		return;
+	}
+
+	while (foreign != NULL) {
+		buf2 += sprintf(buf2, ",\n  FOREIGN KEY (");
+
+		for (i = 0; i < foreign->n_fields; i++) {
+			buf2 += sprintf(buf2, "`%s`",
+					foreign->foreign_col_names[i]);
+			
+			if (i + 1 < foreign->n_fields) {
+				buf2 += sprintf(buf2, ", ");
+			}
+		}
+
+		buf2 += sprintf(buf2, ") REFERENCES `%s` (",
+					foreign->referenced_table_name);
+		/* Change the '/' in the table name to '.' */
+
+		for (i = ut_strlen(buf); i > 0; i--) {
+			if (buf[i] == '/') {
+
+				buf[i] = '.';
+
+				break;
+			}
+		}
+	
+		for (i = 0; i < foreign->n_fields; i++) {
+			buf2 += sprintf(buf2, "`%s`",
+					foreign->referenced_col_names[i]);
+			if (i + 1 < foreign->n_fields) {
+				buf2 += sprintf(buf2, ", ");
+			}
+		}
+
+		buf2 += sprintf(buf2, ")");
+
+		if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) {
+			buf2 += sprintf(buf2, " ON DELETE CASCADE");
+		}
+	
+		if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
+			buf2 += sprintf(buf2, " ON DELETE SET NULL");
+		}
+
+		foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+	}
+
+	mutex_exit(&(dict_sys->mutex));
+
+	buf[len - 1] = '\0';
+	ut_memcpy(str, buf, len);
+}
+
+/**************************************************************************
 Sprintfs to a string info on foreign keys of a table. */
 
 void
 dict_print_info_on_foreign_keys(
 /*============================*/
+	ibool		create_table_format, /* in: if TRUE then print in
+				a format suitable to be inserted into
+				a CREATE TABLE, otherwise in the format
+				of SHOW TABLE STATUS */
 	char*		str,	/* in/out: pointer to a string */
 	ulint		len,	/* in: space in str available for info */
 	dict_table_t*	table)	/* in: table */
@@ -2903,6 +3158,12 @@ dict_print_info_on_foreign_keys(
 	char*		buf2;
 	char		buf[10000];
 
+	if (create_table_format) {
+		dict_print_info_on_foreign_keys_in_create_format(
+						buf, str, len, table);
+		return;
+	}
+
 	buf2 = buf;
 
 	mutex_enter(&(dict_sys->mutex));
@@ -2916,11 +3177,12 @@ dict_print_info_on_foreign_keys(
 	}
 
 	while (foreign != NULL) {
-		buf2 += sprintf(buf2, "; (");			
-		
+		buf2 += sprintf(buf2, "; (");
+
 		for (i = 0; i < foreign->n_fields; i++) {
 			buf2 += sprintf(buf2, "%s",
 					foreign->foreign_col_names[i]);
+			
 			if (i + 1 < foreign->n_fields) {
 				buf2 += sprintf(buf2, " ");
 			}
diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c
index d803d28963d..221a6c7dabb 100644
--- a/innobase/dict/dict0load.c
+++ b/innobase/dict/dict0load.c
@@ -688,7 +688,16 @@ dict_load_indexes(
 		
 			dict_load_fields(table, index, heap);
 
-			dict_index_add_to_cache(table, index);
+			if (index->type & DICT_CLUSTERED == 0
+			    && NULL == dict_table_get_first_index(table)) {
+
+				fprintf(stderr,
+	"InnoDB: Error: trying to load index %s for table %s\n"
+	"InnoDB: but the first index was not clustered\n",
+				index->name, table->name);
+			} else {
+				dict_index_add_to_cache(table, index);
+			}
 		}
 
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
@@ -937,6 +946,11 @@ dict_load_foreign(
 	foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len));
 
 	ut_a(len == 4);
+
+	/* We store the type to the bits 24-31 of n_fields */
+	
+	foreign->type = foreign->n_fields >> 24;
+	foreign->n_fields = foreign->n_fields & 0xFFFFFF;
 	
 	foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(id) + 1);
 				
diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c
index 1f9a44aca35..52f46062065 100644
--- a/innobase/dict/dict0mem.c
+++ b/innobase/dict/dict0mem.c
@@ -61,7 +61,8 @@ dict_mem_table_create(
 	table->mem_fix = 0;
 
 	table->n_mysql_handles_opened = 0;
-
+	table->n_foreign_key_checks_running = 0;
+		
 	table->cached = FALSE;
 	
 	table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
@@ -235,6 +236,7 @@ dict_mem_foreign_create(void)
 
 	foreign->id = NULL;
 
+	foreign->type = 0;
 	foreign->foreign_table_name = NULL;
 	foreign->foreign_table = NULL;
 	foreign->foreign_col_names = NULL;
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
index 727e52707e8..35f3792f041 100644
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@@ -89,8 +89,8 @@ struct fil_node_struct {
 	char*		name;	/* the file name or path */
 	ibool		open;	/* TRUE if file open */
 	os_file_t	handle;	/* OS handle to the file, if file open */
-	ulint		size;	/* size of the file in database blocks
-				(where the possible last incomplete block
+	ulint		size;	/* size of the file in database pages
+				(where the possible last incomplete megabyte
 				is ignored) */
 	ulint		n_pending;
 				/* count of pending i/o-ops on this file */
@@ -945,6 +945,76 @@ fil_node_complete_io(
 	}
 }
 		
+/**************************************************************************
+Tries to extend a data file by the number of pages given. Any fractions of a
+megabyte are ignored. */
+
+ibool
+fil_extend_last_data_file(
+/*======================*/
+				/* out: TRUE if success, also if we run
+				out of disk space we may return TRUE */
+	ulint*	actual_increase,/* out: number of pages we were able to
+				extend, here the orginal size of the file and
+				the resulting size of the file are rounded
+				downwards to a full megabyte, and the
+				difference expressed in pages is returned */
+	ulint	size_increase)	/* in: try to extend this many pages */
+{
+	fil_node_t*	node;
+	fil_space_t*	space;
+	fil_system_t*	system		= fil_system;
+	byte*		buf;
+	ibool		success;
+	ulint		i;
+
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(hash, system->spaces, 0, space, space->id == 0);
+
+	ut_a(space);
+	
+	node = UT_LIST_GET_LAST(space->chain);
+
+	fil_node_prepare_for_io(node, system, space);
+
+	buf = mem_alloc(1024 * 1024);
+
+	memset(buf, '\0', 1024 * 1024);
+
+	for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE); i++) {
+
+		success = os_file_write(node->name, node->handle, buf,
+			(node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF,
+			node->size >> (32 - UNIV_PAGE_SIZE_SHIFT),
+			1024 * 1024);
+
+		if (!success) {
+
+			break;
+		}
+
+		node->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
+		space->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+		os_has_said_disk_full = FALSE;
+	}
+
+	mem_free(buf);
+
+	fil_node_complete_io(node, system, OS_FILE_WRITE);
+
+	mutex_exit(&(system->mutex));	
+
+	*actual_increase = i * ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+	fil_flush(0);
+
+	srv_data_file_sizes[srv_n_data_files - 1] += *actual_increase;
+
+	return(TRUE);
+}
+
 /************************************************************************
 Reads or writes data. This operation is asynchronous (aio). */
 
@@ -966,9 +1036,9 @@ fil_io(
 	ulint	byte_offset,	/* in: remainder of offset in bytes; in
 				aio this must be divisible by the OS block
 				size */
-	ulint	len,		/* in: how many bytes to read; this must
-				not cross a file boundary; in aio this must
-				be a block size multiple */
+	ulint	len,		/* in: how many bytes to read or write; this
+				must not cross a file boundary; in aio this
+				must be a block size multiple */
 	void*	buf,		/* in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c
index 25fc2891e76..08608731f2e 100644
--- a/innobase/fsp/fsp0fsp.c
+++ b/innobase/fsp/fsp0fsp.c
@@ -50,7 +50,7 @@ descriptor page, but used only in the first. */
 #define	FSP_FREE_LIMIT		12	/* Minimum page number for which the
 					free list has not been initialized:
 					the pages >= this limit are, by
-					definition free */
+					definition, free */
 #define	FSP_LOWEST_NO_WRITE	16	/* The lowest page offset for which
 					the page has not been written to disk
 					(if it has been written, we know that
@@ -899,6 +899,106 @@ fsp_header_inc_size(
 }
 
 /**************************************************************************
+Gets the current free limit of a tablespace. The free limit means the
+place of the first page which has never been put to the the free list
+for allocation. The space above that address is initialized to zero.
+Sets also the global variable log_fsp_current_free_limit. */
+
+ulint
+fsp_header_get_free_limit(
+/*======================*/
+			/* out: free limit in megabytes */
+	ulint	space)	/* in: space id */
+{
+	fsp_header_t*	header;
+	ulint		limit;
+	mtr_t		mtr;
+
+	ut_a(space == 0); /* We have only one log_fsp_current_... variable */
+	
+	mtr_start(&mtr);
+
+	mtr_x_lock(fil_space_get_latch(space), &mtr);	
+
+	header = fsp_get_space_header(space, &mtr);
+
+	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr);
+
+	limit = limit / ((1024 * 1024) / UNIV_PAGE_SIZE);
+	
+	log_fsp_current_free_limit_set_and_checkpoint(limit);
+
+	mtr_commit(&mtr);
+
+	return(limit);
+}
+
+/***************************************************************************
+Tries to extend the last data file file if it is defined as auto-extending. */
+static
+ibool
+fsp_try_extend_last_file(
+/*=====================*/
+					/* out: FALSE if not auto-extending */
+	ulint*		actual_increase,/* out: actual increase in pages */
+	ulint		space,		/* in: space */
+	fsp_header_t*	header,		/* in: space header */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ulint	size;
+	ulint	size_increase;
+	ibool	success;
+
+	ut_a(space == 0);
+
+	*actual_increase = 0;
+
+	if (!srv_auto_extend_last_data_file) {
+
+		return(FALSE);
+	}
+
+	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+	if (srv_last_file_size_max != 0) {
+		if (srv_last_file_size_max
+			 < srv_data_file_sizes[srv_n_data_files - 1]) {
+
+			fprintf(stderr,
+"InnoDB: Error: Last data file size is %lu, max size allowed %lu\n",
+				srv_data_file_sizes[srv_n_data_files - 1],
+				srv_last_file_size_max);
+		}
+
+		size_increase = srv_last_file_size_max
+				 - srv_data_file_sizes[srv_n_data_files - 1];
+		if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
+			size_increase = SRV_AUTO_EXTEND_INCREMENT;
+		}
+	} else {
+		size_increase = SRV_AUTO_EXTEND_INCREMENT;
+	}
+				
+	if (size_increase == 0) {
+		return(TRUE);
+	}
+	
+	/* Extend the data file. If we are not able to extend
+	the full requested length, the function tells us
+	the number of full megabytes (but the unit is pages!)
+	we were able to extend. */
+				
+	success = fil_extend_last_data_file(actual_increase, size_increase);
+
+	if (success) {
+		mlog_write_ulint(header + FSP_SIZE, size + *actual_increase,
+							MLOG_4BYTES, mtr);
+	}
+
+	return(TRUE);
+}
+
+/**************************************************************************
 Puts new extents to the free list if there are free extents above the free
 limit. If an extent happens to contain an extent descriptor page, the extent
 is put to the FSP_FREE_FRAG list with the page marked as used. */
@@ -917,8 +1017,9 @@ fsp_fill_free_list(
 	ulint	frag_n_used;
 	page_t*	descr_page;
 	page_t*	ibuf_page;
-	mtr_t	ibuf_mtr;
+	ulint	actual_increase;
 	ulint	i;
+	mtr_t	ibuf_mtr;
 
 	ut_ad(header && mtr);
 	
@@ -926,12 +1027,28 @@ fsp_fill_free_list(
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
 
+	if (srv_auto_extend_last_data_file
+			&& size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+
+		/* Try to increase the last data file size */
+		fsp_try_extend_last_file(&actual_increase, space, header,
+									mtr);
+		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	}
+
 	i = limit;
 		
 	while ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD)) {
 
 		mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
 							MLOG_4BYTES, mtr); 
+
+		/* Update the free limit info in the log system and make
+		a checkpoint */
+		log_fsp_current_free_limit_set_and_checkpoint(
+				(i + FSP_EXTENT_SIZE)
+				/ ((1024 * 1024) / UNIV_PAGE_SIZE));
+
 		if (0 == i % XDES_DESCRIBED_PER_PAGE) {
 
 			/* We are going to initialize a new descriptor page
@@ -1172,6 +1289,7 @@ fsp_free_page(
 	xdes_t*		descr;
 	ulint		state;
 	ulint		frag_n_used;
+	char		buf[1000];
 	
 	ut_ad(mtr);
 
@@ -1183,10 +1301,38 @@ fsp_free_page(
 
 	state = xdes_get_state(descr, mtr);
 	
-	ut_a((state == XDES_FREE_FRAG) || (state == XDES_FULL_FRAG));
+	if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
+		fprintf(stderr,
+"InnoDB: Error: File space extent descriptor of page %lu has state %lu\n",
+								page, state);
+		ut_sprintf_buf(buf, ((byte*)descr) - 50, 200);
 
-	ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
-								== FALSE);
+		fprintf(stderr, "InnoDB: Dump of descriptor: %s\n", buf);
+		
+		if (state == XDES_FREE) {
+			/* We put here some fault tolerance: if the page
+			is already free, return without doing anything! */
+
+			return;
+		}
+
+		ut_a(0);
+	}
+
+	if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+								== TRUE) {
+		fprintf(stderr,
+"InnoDB: Error: File space extent descriptor of page %lu says it is free\n",
+									page);
+		ut_sprintf_buf(buf, ((byte*)descr) - 50, 200);
+
+		fprintf(stderr, "InnoDB: Dump of descriptor: %s\n", buf);
+
+		/* We put here some fault tolerance: if the page
+		is already free, return without doing anything! */
+
+		return;
+	}
 
 	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
 	xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
@@ -2243,13 +2389,15 @@ fsp_reserve_free_extents(
 	mtr_t*	mtr)	/* in: mtr */
 {
 	fsp_header_t*	space_header;
+	rw_lock_t*	latch;
 	ulint		n_free_list_ext;
 	ulint		free_limit;
 	ulint		size;
 	ulint		n_free;
 	ulint		n_free_up;
 	ulint		reserve;
-	rw_lock_t*	latch;
+	ibool		success;
+	ulint		n_pages_added;
 
 	ut_ad(mtr);	
 	ut_ad(!mutex_own(&kernel_mutex)
@@ -2260,7 +2408,7 @@ fsp_reserve_free_extents(
 	mtr_x_lock(latch, mtr);
 
 	space_header = fsp_get_space_header(space, mtr);
-
+try_again:
 	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
 	
 	n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
@@ -2291,7 +2439,7 @@ fsp_reserve_free_extents(
 
 		if (n_free <= reserve + n_ext) {
 
-			return(FALSE);
+			goto try_to_extend;
 		}
 	} else if (alloc_type == FSP_UNDO) {
 		/* We reserve 1 % of the space size to cleaning operations */
@@ -2300,13 +2448,26 @@ fsp_reserve_free_extents(
 
 		if (n_free <= reserve + n_ext) {
 
-			return(FALSE);
+			goto try_to_extend;
 		}
 	} else {
 		ut_a(alloc_type == FSP_CLEANING);
 	}
 
-	return(fil_space_reserve_free_extents(space, n_free, n_ext));
+	success = fil_space_reserve_free_extents(space, n_free, n_ext);
+
+	if (success) {
+		return(TRUE);
+	}
+try_to_extend:
+	success = fsp_try_extend_last_file(&n_pages_added, space,
+							space_header, mtr);
+	if (success && n_pages_added > 0) {
+
+		goto try_again;
+	}
+
+	return(FALSE);
 }
 
 /**************************************************************************
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
index bea85565125..d22f9d79c1c 100644
--- a/innobase/include/btr0btr.h
+++ b/innobase/include/btr0btr.h
@@ -49,6 +49,12 @@ inserted to the index, at the searched position */
 /* This flag ORed to latch mode says that we do the search in query
 optimization */
 #define BTR_ESTIMATE		1024
+
+/* This flag ORed to latch mode says that we can ignore possible
+UNIQUE definition on secondary indexes when we decide if we can use the
+insert buffer to speed up inserts */
+#define BTR_IGNORE_SEC_UNIQUE	2048	
+
 /******************************************************************
 Gets a buffer page and declares its latching order level. */
 UNIV_INLINE
diff --git a/innobase/include/buf0flu.h b/innobase/include/buf0flu.h
index cb1c0965a65..1b40acaa269 100644
--- a/innobase/include/buf0flu.h
+++ b/innobase/include/buf0flu.h
@@ -28,6 +28,16 @@ a margin of replaceable pages there. */
 void
 buf_flush_free_margin(void);
 /*=======================*/
+/************************************************************************
+Initializes a page for writing to the tablespace. */
+
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*	page,		/* in: page */
+	dulint	newest_lsn,	/* in: newest modification lsn to the page */
+	ulint	space,		/* in: space id */
+	ulint	page_no);	/* in: page number */
 /***********************************************************************
 This utility flushes dirty blocks from the end of the LRU list or flush_list.
 NOTE 1: in the case of an LRU flush the calling thread may own latches to
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
index 0f6f516c2cb..fd79e17090a 100644
--- a/innobase/include/dict0dict.h
+++ b/innobase/include/dict0dict.h
@@ -105,7 +105,8 @@ dict_table_autoinc_initialize(
 	dict_table_t*	table,	/* in: table */
 	ib_longlong	value);	/* in: value which was assigned to a row */
 /************************************************************************
-Gets the next autoinc value, 0 if not yet initialized. */
+Gets the next autoinc value, 0 if not yet initialized. If initialized,
+increments the counter by 1. */
 
 ib_longlong
 dict_table_autoinc_get(
@@ -113,6 +114,15 @@ dict_table_autoinc_get(
 				/* out: value for a new row, or 0 */
 	dict_table_t*	table);	/* in: table */
 /************************************************************************
+Reads the autoinc counter value, 0 if not yet initialized. Does not
+increment the counter. */
+
+ib_longlong
+dict_table_autoinc_read(
+/*====================*/
+				/* out: value of the counter */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
 Updates the autoinc counter if the value supplied is bigger than the
 current value. If not inited, does nothing. */
 
@@ -143,7 +153,10 @@ dict_table_rename_in_cache(
 /*=======================*/
 					/* out: TRUE if success */
 	dict_table_t*	table,		/* in: table */
-	char*		new_name);	/* in: new name */
+	char*		new_name,	/* in: new name */
+	ibool		rename_also_foreigns);/* in: in ALTER TABLE we want
+					to preserve the original table name
+					in constraints which reference it */
 /**************************************************************************
 Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
@@ -284,6 +297,10 @@ Sprintfs to a string info on foreign keys of a table. */
 void
 dict_print_info_on_foreign_keys(
 /*============================*/
+	ibool		create_table_format, /* in: if TRUE then print in
+				a format suitable to be inserted into
+				a CREATE TABLE, otherwise in the format
+				of SHOW TABLE STATUS */
 	char*		str,	/* in/out: pointer to a string */
 	ulint		len,	/* in: space in str available for info */
 	dict_table_t*	table);	/* in: table */
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
index 5ef0103087a..ef15c99fdba 100644
--- a/innobase/include/dict0mem.h
+++ b/innobase/include/dict0mem.h
@@ -249,6 +249,8 @@ struct dict_foreign_struct{
 					this memory heap */
 	char*		id;		/* id of the constraint as a
 					null-terminated string */
+	ulint		type;		/* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
+					or DICT_FOREIGN_ON_DELETE_SET_NULL */
 	char*		foreign_table_name;/* foreign table name */
 	dict_table_t*	foreign_table;	/* table where the foreign key is */
 	char**		foreign_col_names;/* names of the columns in the
@@ -278,6 +280,9 @@ struct dict_foreign_struct{
 					table */
 };
 
+#define DICT_FOREIGN_ON_DELETE_CASCADE	1
+#define DICT_FOREIGN_ON_DELETE_SET_NULL	2
+
 #define	DICT_INDEX_MAGIC_N	76789786
 
 /* Data structure for a database table */
@@ -313,6 +318,12 @@ struct dict_table_struct{
 				NOT allowed until this count gets to zero;
 				MySQL does NOT itself check the number of
 				open handles at drop */
+	ulint		n_foreign_key_checks_running;
+				/* count of how many foreign key check
+				operations are currently being performed
+				on the table: we cannot drop the table while
+				there are foreign key checks running on
+				it! */
 	ibool		cached;	/* TRUE if the table object has been added
 				to the dictionary cache */
 	lock_t*		auto_inc_lock;/* a buffer for an auto-inc lock
@@ -359,17 +370,16 @@ struct dict_table_struct{
 			        after database startup or table creation */
 	ulint		stat_modified_counter;
 				/* when a row is inserted, updated, or deleted,
-				we add the row length to this number; we
-				calculate new estimates for the stat_...
-				values for the table and the indexes at an
-				interval of 2 GB or when about 1 / 16 of table
-				has been modified; also
-				when the estimate operation is called
-				for MySQL SHOW TABLE STATUS; the counter is
-				reset to zero at statistics calculation;
-				this counter
-				is not protected by any latch, because this
-				is only used for heuristics */
+				we add 1 to this number; we calculate new
+				estimates for the stat_... values for the
+				table and the indexes at an interval of 2 GB
+				or when about 1 / 16 of table has been
+				modified; also when the estimate operation is
+				called for MySQL SHOW TABLE STATUS; the
+				counter is reset to zero at statistics
+				calculation; this counter is not protected by
+				any latch, because this is only used for
+				heuristics */
 	/*----------------------*/
 	mutex_t		autoinc_mutex;
 				/* mutex protecting the autoincrement
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index ca74ea4cb2c..63e20221c16 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -64,8 +64,10 @@ extern fil_addr_t	fil_addr_null;
 #define FIL_PAGE_DATA		38	/* start of the data on the page */
 
 /* File page trailer */
-#define FIL_PAGE_END_LSN	8	/* this should be same as
-					FIL_PAGE_LSN */
+#define FIL_PAGE_END_LSN	8	/* the low 4 bytes of this are used
+					to store the page checksum, the
+					last 4 bytes should be identical
+					to the last 4 bytes of FIL_PAGE_LSN */
 #define FIL_PAGE_DATA_END	8
 
 /* File page types */
@@ -134,6 +136,21 @@ fil_space_truncate_start(
 	ulint	trunc_len);	/* in: truncate by this much; it is an error
 				if this does not equal to the combined size of
 				some initial files in the space */
+/**************************************************************************
+Tries to extend a data file by the number of pages given. Any fractions of a
+megabyte are ignored. */
+
+ibool
+fil_extend_last_data_file(
+/*======================*/
+				/* out: TRUE if success, also if we run
+				out of disk space we may return TRUE */
+	ulint*	actual_increase,/* out: number of pages we were able to
+				extend, here the orginal size of the file and
+				the resulting size of the file are rounded
+				downwards to a full megabyte, and the
+				difference expressed in pages is returned */
+	ulint	size_increase);	/* in: try to extend this many pages */
 /***********************************************************************
 Frees a space object from a file system. Closes the files in the chain
 but does not delete them. */
diff --git a/innobase/include/fsp0fsp.h b/innobase/include/fsp0fsp.h
index e7f9eab330b..a0197ec2d97 100644
--- a/innobase/include/fsp0fsp.h
+++ b/innobase/include/fsp0fsp.h
@@ -46,6 +46,17 @@ void
 fsp_init(void);
 /*==========*/
 /**************************************************************************
+Gets the current free limit of a tablespace. The free limit means the
+place of the first page which has never been put to the the free list
+for allocation. The space above that address is initialized to zero.
+Sets also the global variable log_fsp_current_free_limit. */
+
+ulint
+fsp_header_get_free_limit(
+/*======================*/
+			/* out: free limit in megabytes */
+	ulint	space);	/* in: space id */
+/**************************************************************************
 Initializes the space header of a new created space. */
 
 void
diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h
index 99fb1595f49..fac28461be4 100644
--- a/innobase/include/ibuf0ibuf.h
+++ b/innobase/include/ibuf0ibuf.h
@@ -127,7 +127,11 @@ UNIV_INLINE
 ibool
 ibuf_should_try(
 /*============*/
-	dict_index_t*	index);	/* in: index where to insert */
+	dict_index_t*	index,			/* in: index where to insert */
+	ulint		ignore_sec_unique);	/* in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
 /**********************************************************************
 Returns TRUE if the current OS thread is performing an insert buffer
 routine. */
diff --git a/innobase/include/ibuf0ibuf.ic b/innobase/include/ibuf0ibuf.ic
index e969a0550da..0886c8c02cc 100644
--- a/innobase/include/ibuf0ibuf.ic
+++ b/innobase/include/ibuf0ibuf.ic
@@ -81,10 +81,16 @@ UNIV_INLINE
 ibool
 ibuf_should_try(
 /*============*/
-	dict_index_t*	index)	/* in: index where to insert */
+	dict_index_t*	index,			/* in: index where to insert */
+	ulint		ignore_sec_unique)	/* in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
 {
-	if (!(index->type & (DICT_CLUSTERED | DICT_UNIQUE))
-					&& ibuf->meter > IBUF_THRESHOLD) {
+	if (!(index->type & DICT_CLUSTERED)
+	    && (ignore_sec_unique || !(index->type & DICT_UNIQUE))
+	    && ibuf->meter > IBUF_THRESHOLD) {
+
 		ibuf_flush_count++;
 
 		if (ibuf_flush_count % 8 == 0) {
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index adff9fae544..eeb4f2e45f1 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -26,6 +26,32 @@ extern 	ibool	log_debug_writes;
 #define	LOG_WAIT_ALL_GROUPS	93
 #define LOG_MAX_N_GROUPS	32
 
+/********************************************************************
+Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
+so that we know that the limit has been written to a log checkpoint field
+on disk. */
+
+void
+log_fsp_current_free_limit_set_and_checkpoint(
+/*==========================================*/
+	ulint	limit);	/* in: limit to set */
+/***********************************************************************
+Calculates where in log files we find a specified lsn. */
+
+ulint
+log_calc_where_lsn_is(
+/*==================*/
+						/* out: log file number */
+	ib_longlong*	log_file_offset,	/* out: offset in that file
+						(including the header) */
+	dulint		first_header_lsn,	/* in: first log file start
+						lsn */
+	dulint		lsn,			/* in: lsn whose position to
+						determine */
+	ulint		n_log_files,		/* in: total number of log
+						files */
+	ib_longlong	log_file_size);		/* in: log file size
+						(including the header) */
 /****************************************************************
 Writes to the log the string given. The log must be released with
 log_release. */
@@ -225,6 +251,16 @@ Writes checkpoint info to groups. */
 void
 log_groups_write_checkpoint_info(void);
 /*==================================*/
+/**********************************************************
+Writes info to a buffer of a log group when log files are created in
+backup restoration. */
+
+void
+log_reset_first_header_and_checkpoint(
+/*==================================*/
+	byte*	hdr_buf,/* in: buffer which will be written to the start
+			of the first log file */
+	dulint	lsn);	/* in: lsn of the start of the first log file */
 /************************************************************************
 Starts an archiving operation. */
 
@@ -507,7 +543,16 @@ extern log_t*	log_sys;
 							+ LOG_MAX_N_GROUPS * 8)
 #define LOG_CHECKPOINT_CHECKSUM_1 	LOG_CHECKPOINT_ARRAY_END
 #define LOG_CHECKPOINT_CHECKSUM_2 	(4 + LOG_CHECKPOINT_ARRAY_END)
-#define LOG_CHECKPOINT_SIZE		(8 + LOG_CHECKPOINT_ARRAY_END)
+#define LOG_CHECKPOINT_FSP_FREE_LIMIT	(8 + LOG_CHECKPOINT_ARRAY_END)
+					/* current fsp free limit in the
+					tablespace, in units of one megabyte */
+#define LOG_CHECKPOINT_FSP_MAGIC_N	(12 + LOG_CHECKPOINT_ARRAY_END)
+					/* this magic number tells if the
+					checkpoint contains the above field:
+					the field was added to InnoDB-3.23.50 */
+#define LOG_CHECKPOINT_SIZE		(16 + LOG_CHECKPOINT_ARRAY_END)
+
+#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL	1441231243
 
 /* Offsets of a log file header */
 #define LOG_GROUP_ID		0	/* log group number */
diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h
index 8f896756db9..0825325965d 100644
--- a/innobase/include/log0recv.h
+++ b/innobase/include/log0recv.h
@@ -16,6 +16,39 @@ Created 9/20/1997 Heikki Tuuri
 #include "log0log.h"
 
 /***********************************************************************
+Reads the checkpoint info needed in hot backup. */
+
+ibool
+recv_read_cp_info_for_backup(
+/*=========================*/
+			/* out: TRUE if success */
+	byte*	hdr,	/* in: buffer containing the log group header */
+	dulint*	lsn,	/* out: checkpoint lsn */
+	ulint*	offset,	/* out: checkpoint offset in the log group */
+	ulint*	fsp_limit,/* out: fsp limit, 1000000000 if the database
+			is running with < version 3.23.50 of InnoDB */
+	dulint*	cp_no,	/* out: checkpoint number */
+	dulint*	first_header_lsn);
+			/* out: lsn of of the start of the first log file */
+/***********************************************************************
+Scans the log segment and n_bytes_scanned is set to the length of valid
+log scanned. */
+
+void
+recv_scan_log_seg_for_backup(
+/*=========================*/
+	byte*		buf,		/* in: buffer containing log data */
+	ulint		buf_len,	/* in: data length in that buffer */
+	dulint*		scanned_lsn,	/* in/out: lsn of buffer start,
+					we return scanned lsn */
+	ulint*		scanned_checkpoint_no,
+					/* in/out: 4 lowest bytes of the
+					highest scanned checkpoint number so
+					far */
+	ulint*		n_bytes_scanned);/* out: how much we were able to
+					scan, smaller than buf_len if log
+					data ended here */
+/***********************************************************************
 Returns TRUE if recovery is currently running. */
 UNIV_INLINE
 ibool
@@ -35,6 +68,10 @@ read in, or also for a page already in the buffer pool. */
 void
 recv_recover_page(
 /*==============*/
+	ibool	recover_backup,	/* in: TRUE if we are recovering a backup
+				page: then we do not acquire any latches
+				since the page was read in outside the
+				buffer pool */
 	ibool	just_read_in,	/* in: TRUE if the i/o-handler calls this for
 				a freshly read page */
 	page_t*	page,		/* in: buffer page */
@@ -69,8 +106,15 @@ recv_scan_log_recs(
 /*===============*/
 				/* out: TRUE if limit_lsn has been reached, or
 				not able to scan any more in this log group */
+	ibool	apply_automatically,/* in: TRUE if we want this function to
+				apply log records automatically when the
+				hash table becomes full; in the hot backup tool
+				the tool does the applying, not this
+				function */
+	ulint	available_memory,/* in: we let the hash table of recs to grow
+				to this size, at the maximum */
 	ibool	store_to_hash,	/* in: TRUE if the records should be stored
-				to the hash table; this is set FALSE if just
+				to the hash table; this is set to FALSE if just
 				debug checking is needed */
 	byte*	buf,		/* in: buffer containing a log segment or
 				garbage */
@@ -92,6 +136,16 @@ recv_reset_logs(
 	ibool	new_logs_created);/* in: TRUE if resetting logs is done
 				at the log creation; FALSE if it is done
 				after archive recovery */
+/**********************************************************
+Creates new log files after a backup has been restored. */
+
+void
+recv_reset_log_files_for_backup(
+/*============================*/
+	char*	log_dir,	/* in: log file directory path */
+	ulint	n_log_files,	/* in: number of log files */
+	ulint	log_file_size,	/* in: log file size */
+	dulint	lsn);		/* in: new start lsn */
 /************************************************************
 Creates the recovery system. */
 
@@ -102,8 +156,11 @@ recv_sys_create(void);
 Inits the recovery system for a recovery operation. */
 
 void
-recv_sys_init(void);
-/*===============*/
+recv_sys_init(
+/*==========*/
+	ibool	recover_from_backup,	/* in: TRUE if this is called
+					to recover from a hot backup */
+	ulint	available_memory);	/* in: available memory in bytes */
 /***********************************************************************
 Empties the hash table of stored log records, applying them to appropriate
 pages. */
@@ -118,6 +175,17 @@ recv_apply_hashed_log_recs(
 				disk and invalidated in buffer pool: this
 				alternative means that no new log records
 				can be generated during the application */
+/***********************************************************************
+Applies log records in the hash table to a backup. */
+
+void
+recv_apply_log_recs_for_backup(
+/*===========================*/
+	ulint	n_data_files,	/* in: number of data files */
+	char**	data_files,	/* in: array containing the paths to the
+				data files */
+	ulint*	file_sizes);	/* in: sizes of the data files in database
+				pages */
 /************************************************************
 Recovers from archived log files, and also from log files, if they exist. */
 
@@ -260,6 +328,14 @@ extern ibool		recv_recovery_on;
 extern ibool		recv_no_ibuf_operations;
 extern ibool		recv_needed_recovery;
 
+/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
+times! */ 
+#define RECV_PARSING_BUF_SIZE	(2 * 1024 * 1024)
+
+/* Size of block reads when the log groups are scanned forward to do a
+roll-forward */
+#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
+
 /* States of recv_addr_struct */
 #define RECV_NOT_PROCESSED	71
 #define RECV_BEING_READ		72
diff --git a/innobase/include/mem0mem.h b/innobase/include/mem0mem.h
index 89c5428f054..bfd25f5bdbe 100644
--- a/innobase/include/mem0mem.h
+++ b/innobase/include/mem0mem.h
@@ -41,11 +41,11 @@ page buffer pool; the latter method is used for very big heaps */
 
 /* The following start size is used for the first block in the memory heap if
 the size is not specified, i.e., 0 is given as the parameter in the call of
-create. The standard size is the maximum size of the blocks used for
+create. The standard size is the maximum (payload) size of the blocks used for
 allocations of small buffers. */
 
 #define MEM_BLOCK_START_SIZE            64
-#define MEM_BLOCK_STANDARD_SIZE         8192
+#define MEM_BLOCK_STANDARD_SIZE         8000
 
 /* If a memory heap is allowed to grow into the buffer pool, the following
 is the maximum size for a single allocated buffer: */
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index 411a9fb2c21..01fa12955ff 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -11,6 +11,12 @@ Created 10/21/1995 Heikki Tuuri
 
 #include "univ.i"
 
+
+/* If the following is set to TRUE, we do not call os_file_flush in every
+os_file_write */
+extern ibool	os_do_not_call_flush_at_each_write;
+extern ibool	os_has_said_disk_full;
+
 #ifdef __WIN__
 
 /* We define always WIN_ASYNC_IO, and check at run-time whether
@@ -19,12 +25,6 @@ Created 10/21/1995 Heikki Tuuri
 
 #define UNIV_NON_BUFFERED_IO
 
-#else
-
-#if defined(HAVE_AIO_H) && defined(HAVE_LIBRT)
-#define POSIX_ASYNC_IO
-#endif
-
 #endif
 
 #ifdef __WIN__
@@ -55,6 +55,9 @@ log. */
 #define	OS_FILE_CREATE			52
 #define OS_FILE_OVERWRITE		53
 
+#define OS_FILE_READ_ONLY 		333
+#define	OS_FILE_READ_WRITE		444
+
 /* Options for file_create */
 #define	OS_FILE_AIO			61
 #define	OS_FILE_NORMAL			62
@@ -118,6 +121,27 @@ os_get_os_version(void);
 /*===================*/
                   /* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */
 /********************************************************************
+Creates the seek mutexes used in positioned reads and writes. */
+
+void
+os_io_init_simple(void);
+/*===================*/
+/********************************************************************
+A simple function to open or create a file. */
+
+os_file_t
+os_file_create_simple(
+/*==================*/
+			/* out, own: handle to the file, not defined if error,
+			error number can be retrieved with os_get_last_error */
+	char*	name,	/* in: name of the file or path as a null-terminated
+			string */
+	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
+			(if does not exist, error), or OS_FILE_CREATE if a new
+			file is created (if exists, error) */
+	ulint	access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
+	ibool*	success);/* out: TRUE if succeed, FALSE if error */
+/********************************************************************
 Opens an existing file or creates a new. */
 
 os_file_t
@@ -317,6 +341,8 @@ os_aio_windows_handle(
 	void**	message2,
 	ulint*	type);		/* out: OS_FILE_WRITE or ..._READ */
 #endif
+
+/* Currently we do not use Posix async i/o */
 #ifdef POSIX_ASYNC_IO
 /**************************************************************************
 This function is only used in Posix asynchronous i/o. Waits for an aio
diff --git a/innobase/include/os0sync.h b/innobase/include/os0sync.h
index 617f6b036fe..b2d613c4619 100644
--- a/innobase/include/os0sync.h
+++ b/innobase/include/os0sync.h
@@ -149,9 +149,9 @@ void
 os_mutex_free(
 /*==========*/
 	os_mutex_t	mutex);	/* in: mutex to free */
-#ifndef _WIN32
 /**************************************************************
-Acquires ownership of a fast mutex. */
+Acquires ownership of a fast mutex. Currently in Windows this is the same
+as os_fast_mutex_lock! */
 UNIV_INLINE
 ulint
 os_fast_mutex_trylock(
@@ -160,7 +160,6 @@ os_fast_mutex_trylock(
 						was reserved by another
 						thread */
 	os_fast_mutex_t*	fast_mutex);	/* in: mutex to acquire */
-#endif
 /**************************************************************
 Releases ownership of a fast mutex. */
 
diff --git a/innobase/include/os0sync.ic b/innobase/include/os0sync.ic
index aa00300dec5..6bff75d8ec6 100644
--- a/innobase/include/os0sync.ic
+++ b/innobase/include/os0sync.ic
@@ -10,9 +10,9 @@ Created 9/6/1995 Heikki Tuuri
 #include <winbase.h>
 #endif
 
-#ifndef _WIN32
 /**************************************************************
-Acquires ownership of a fast mutex. */
+Acquires ownership of a fast mutex. Currently in Windows this is the same
+as os_fast_mutex_lock! */
 UNIV_INLINE
 ulint
 os_fast_mutex_trylock(
@@ -23,20 +23,11 @@ os_fast_mutex_trylock(
 	os_fast_mutex_t*	fast_mutex)	/* in: mutex to acquire */
 {
 #ifdef __WIN__	
-	int	ret;
+	EnterCriticalSection(fast_mutex);
 
-	/* TODO: TryEnterCriticalSection is probably not found from
-	NT versions < 4! */
-	ret = TryEnterCriticalSection(fast_mutex);
-
-	if (ret) {
-		return(0);
-	}
-
-	return(1);
+	return(0);
 #else
 	return((ulint) pthread_mutex_trylock(fast_mutex));
 #endif
 }
-#endif
 
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 48b6ba8a715..13b3dffd874 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -209,6 +209,27 @@ row_update_for_mysql(
 	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
 					handle */
 /*************************************************************************
+Creates an query graph node of 'update' type to be used in the MySQL
+interface. */
+
+upd_node_t*
+row_create_update_node_for_mysql(
+/*=============================*/
+				/* out, own: update node */
+	dict_table_t*	table,	/* in: table to update */
+	mem_heap_t*	heap);	/* in: mem heap from which allocated */
+/**************************************************************************
+Does a cascaded delete or set null in a foreign key operation. */
+
+ulint
+row_update_cascade_for_mysql(
+/*=========================*/
+				/* out: error code or DB_SUCCESS */
+	que_thr_t*	thr,	/* in: query thread */
+	upd_node_t*	node,	/* in: update node used in the cascade
+				or set null operation */
+	dict_table_t*	table);	/* in: table where we do the operation */
+/*************************************************************************
 Does a table creation operation for MySQL. If the name of the created
 table ends to characters INNODB_MONITOR, then this also starts
 printing of monitor output by the master thread. */
@@ -402,13 +423,13 @@ struct row_prebuilt_struct {
 	byte*		ins_upd_rec_buff;/* buffer for storing data converted
 					to the Innobase format from the MySQL
 					format */
-	ibool		in_update_remember_pos;
-					/* if an update is processed, then if
-					this flag is set to TRUE, it means
-					that the stored cursor position in
-					SELECT is the right position also
-					for the update: we can just restore
-					the cursor and save CPU time */
+	ibool		hint_no_need_to_fetch_extra_cols;
+					/* normally this is TRUE, but
+					MySQL will set this to FALSE
+					if we might be required to fetch also
+					other columns than mentioned in the
+					query: the clustered index column(s),
+					or an auto-increment column*/
 	upd_node_t*	upd_node;	/* Innobase SQL update node used
 					to perform updates and deletes */
 	que_fork_t*	ins_graph;	/* Innobase SQL query graph used
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
index 106d3866b25..9a3e2463267 100644
--- a/innobase/include/row0upd.h
+++ b/innobase/include/row0upd.h
@@ -312,6 +312,11 @@ struct upd_node_struct{
 	ibool		in_mysql_interface;
 				/* TRUE if the update node was created
 				for the MySQL interface */
+	upd_node_t*	cascade_node;/* NULL or an update node template which
+				is used to implement ON DELETE CASCADE
+				or ... SET NULL for foreign keys */
+	mem_heap_t*	cascade_heap;/* NULL or a mem heap where the cascade
+				node is created */
 	sel_node_t*	select;	/* query graph subtree implementing a base
 				table cursor: the rows returned will be
 				updated */
@@ -322,6 +327,11 @@ struct upd_node_struct{
 				of the MySQL interface */
 	dict_table_t*	table;	/* table where updated */
 	upd_t*		update;	/* update vector for the row */
+	ulint		update_n_fields;
+				/* when this struct is used to implement
+				a cascade operation for foreign keys, we store
+				here the size of the buffer allocated for use
+				as the update vector */
 	sym_node_list_t	columns;/* symbol table nodes for the columns
 				to retrieve from the table */
 	ibool		has_clust_rec_x_lock;
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index 2b40852fe67..6777a24e7db 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -24,10 +24,13 @@ extern char	srv_fatal_errbuf[];
 thread starts running */
 extern os_event_t	srv_lock_timeout_thread_event;
 
+/* If the last data file is auto-extended, we add this many pages to it
+at a time */
+#define SRV_AUTO_EXTEND_INCREMENT   (8 * ((1024 * 1024) / UNIV_PAGE_SIZE))
+
 /* Server parameters which are read from the initfile */
 
 extern char*	srv_data_home;
-extern char*	srv_logs_home;
 extern char*	srv_arch_dir;
 
 extern ulint	srv_n_data_files;
@@ -35,6 +38,9 @@ extern char**	srv_data_file_names;
 extern ulint*	srv_data_file_sizes;
 extern ulint*   srv_data_file_is_raw_partition;
 
+extern ibool	srv_auto_extend_last_data_file;
+extern ulint	srv_last_file_size_max;
+
 extern ibool	srv_created_new_raw;
 
 #define SRV_NEW_RAW    1
@@ -186,6 +192,19 @@ srv_boot(void);
 /*==========*/
 			/* out: DB_SUCCESS or error code */
 /*************************************************************************
+Initializes the server. */
+
+void
+srv_init(void);
+/*==========*/
+/*************************************************************************
+Initializes the synchronization primitives, memory system, and the thread
+local storage. */
+
+void
+srv_general_init(void);
+/*==================*/
+/*************************************************************************
 Gets the number of threads in the system. */
 
 ulint
diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h
index 6dbdcd27250..01ac063e1c9 100644
--- a/innobase/include/srv0start.h
+++ b/innobase/include/srv0start.h
@@ -12,6 +12,56 @@ Created 10/10/1995 Heikki Tuuri
 
 #include "univ.i"
 
+/*************************************************************************
+Normalizes a directory path for Windows: converts slashes to backslashes. */
+
+void
+srv_normalize_path_for_win(
+/*=======================*/
+	char*	str);	/* in/out: null-terminated character string */
+/*************************************************************************
+Adds a slash or a backslash to the end of a string if it is missing
+and the string is not empty. */
+
+char*
+srv_add_path_separator_if_needed(
+/*=============================*/
+			/* out, own: string which has the separator if the
+			string is not empty */
+	char*	str);	/* in: null-terminated character string */
+/*************************************************************************
+Reads the data files and their sizes from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_data_file_paths_and_sizes(
+/*================================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: the data file path string */
+	char***	data_file_names,	/* out, own: array of data file
+					names */
+	ulint**	data_file_sizes,	/* out, own: array of data file sizes
+					in megabytes */
+	ulint**	data_file_is_raw_partition,/* out, own: array of flags
+					showing which data files are raw
+					partitions */
+	ulint*	n_data_files,		/* out: number of data files */
+	ibool*	is_auto_extending,	/* out: TRUE if the last data file is
+					auto-extending */
+	ulint*	max_auto_extend_size);	/* out: max auto extend size for the
+					last file if specified, 0 if not */
+/*************************************************************************
+Reads log group home directories from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_log_group_home_dirs(
+/*==========================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: character string */
+	char***	log_group_home_dirs);	/* out, own: log group home dirs */
 /********************************************************************
 Starts Innobase and creates a new database if database files
 are not found and the user wants. Server parameters are
diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
index 09580cfc497..43e9202360b 100644
--- a/innobase/include/sync0rw.ic
+++ b/innobase/include/sync0rw.ic
@@ -357,7 +357,7 @@ rw_lock_s_unlock_func(
 
 	/* Reset the shared lock by decrementing the reader count */
 
-	ut_ad(lock->reader_count > 0);
+	ut_a(lock->reader_count > 0);
 	lock->reader_count--;
 
 	#ifdef UNIV_SYNC_DEBUG
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 6c3bff66e27..4f55709a5d7 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -359,12 +359,17 @@ V
 Memory pool mutex */
 
 /* Latching order levels */
+
+/* User transaction locks are higher than any of the latch levels below:
+no latches are allowed when a thread goes to wait for a normal table
+or row lock! */
+#define SYNC_USER_TRX_LOCK	9999
 #define SYNC_NO_ORDER_CHECK	3000	/* this can be used to suppress
 					latching order checking */
 #define	SYNC_LEVEL_NONE		2000	/* default: level not defined */
+#define	SYNC_FOREIGN_KEY_CHECK	1001
 #define SYNC_DICT		1000
 #define SYNC_DICT_AUTOINC_MUTEX	999
-#define	SYNC_FOREIGN_KEY_CHECK	998
 #define	SYNC_PURGE_IS_RUNNING	997
 #define SYNC_DICT_HEADER	995
 #define SYNC_IBUF_HEADER	914
@@ -429,7 +434,7 @@ implementation of a mutual exclusion semaphore. */
 struct mutex_struct {
 	ulint	lock_word;	/* This ulint is the target of the atomic
 				test-and-set instruction in Win32 */
-#ifndef _WIN32
+#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
 	os_fast_mutex_t
 		os_fast_mutex;	/* In other systems we use this OS mutex
 				in place of lock_word */
diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
index 9531377ce0b..9014eb5fb54 100644
--- a/innobase/include/sync0sync.ic
+++ b/innobase/include/sync0sync.ic
@@ -53,7 +53,7 @@ mutex_test_and_set(
 				1 */
 	mutex_t*	mutex)	/* in: mutex */
 {
-#ifdef _WIN32
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
 	ulint	res;
 	ulint*	lw;		/* assembler code is used to ensure that
 				lock_word is loaded from memory */
@@ -120,7 +120,7 @@ mutex_reset_lock_word(
 /*==================*/
 	mutex_t*	mutex)	/* in: mutex */
 {
-#ifdef _WIN32
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
 	ulint*	lw;		/* assembler code is used to ensure that
 				lock_word is loaded from memory */
 	ut_ad(mutex);
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
index f2eded697ec..60d5adb72d1 100644
--- a/innobase/include/trx0sys.h
+++ b/innobase/include/trx0sys.h
@@ -44,6 +44,15 @@ half-written pages in the data files. */
 void
 trx_sys_doublewrite_restore_corrupt_pages(void);
 /*===========================================*/
+/********************************************************************
+Determines if a page number is located inside the doublewrite buffer. */
+
+ibool
+trx_doublewrite_page_inside(
+/*========================*/
+				/* out: TRUE if the location is inside
+				the two blocks of the doublewrite buffer */
+	ulint	page_no);	/* in: page number */
 /*******************************************************************
 Checks if a page address is the trx sys header page. */
 UNIV_INLINE
@@ -250,7 +259,7 @@ therefore 256 */
 /* The offset of the transaction system header on the page */
 #define	TRX_SYS		FSEG_PAGE_DATA
 
-/* Transaction system header; protected by trx_sys->mutex */
+/* Transaction system header */
 /*-------------------------------------------------------------*/
 #define	TRX_SYS_TRX_ID_STORE	0	/* the maximum trx id or trx number
 					modulo TRX_SYS_TRX_ID_UPDATE_MARGIN
diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic
index 786e7905933..ada2d8cb19c 100644
--- a/innobase/include/trx0sys.ic
+++ b/innobase/include/trx0sys.ic
@@ -93,7 +93,6 @@ trx_sysf_get(
 {
 	trx_sysf_t*	header;
 
-	ut_ad(mutex_own(&(kernel_mutex)));
 	ut_ad(mtr);
 	
 	header = TRX_SYS + buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index 26c9ace08b6..261f33d3dc3 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -298,6 +298,17 @@ struct trx_struct{
 					of view of concurrency control:
 					TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
 					... */
+	ibool		check_foreigns;	/* normally TRUE, but if the user
+					wants to suppress foreign key checks,
+					(in table imports, for example) we
+					set this FALSE */
+	ibool		check_unique_secondary;
+					/* normally TRUE, but if the user
+					wants to speed up inserts by
+					suppressing unique key checks
+					for secondary indexes when we decide
+					if we can use the insert buffer for
+					them, we set this FALSE */
 	dulint		id;		/* transaction id */
 	dulint		no;		/* transaction serialization number ==
 					max trx id when the transaction is 
@@ -328,6 +339,9 @@ struct trx_struct{
                                         /* how many tables the current SQL
 					statement uses, except those
 					in consistent read */
+	ibool		has_dict_foreign_key_check_lock;
+					/* TRUE if the trx currently holds
+					an s-lock on dict_foreign_... */
         ibool           has_search_latch;
 			                /* TRUE if this trx has latched the
 			                search system latch in S-mode */
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index 8870d80f611..160a435319a 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -14,6 +14,13 @@ Created 1/20/1994 Heikki Tuuri
 
 #include <windows.h>
 
+/* When compiling for Itanium IA64, undefine the flag below to prevent use
+of 32-bit assembler */
+
+#ifndef WIN64
+#define UNIV_CAN_USE_X86_ASSEMBLER
+#endif
+
 /* If you want to check for errors with compiler level -W4,
 comment out the above include of windows.h and let the following defines
 be defined:
@@ -71,13 +78,14 @@ memory is read outside the allocated blocks. */
 */
 
 /* Make a non-inline debug version */
+
 /*
 #define UNIV_DEBUG
 #define UNIV_MEM_DEBUG
-#define UNIV_SEARCH_DEBUG
+#define UNIV_SYNC_DEBUG
 
 #define UNIV_IBUF_DEBUG
-#define UNIV_SYNC_DEBUG
+#define UNIV_SEARCH_DEBUG
 #define UNIV_SYNC_PERF_STAT
 #define UNIV_SEARCH_PERF_STAT
 */
diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h
index b45f2160392..4fb45221899 100644
--- a/innobase/include/ut0byte.h
+++ b/innobase/include/ut0byte.h
@@ -55,6 +55,15 @@ ut_dulint_get_low(
 			/* out: 32 bits in ulint */
 	dulint	d);	/* in: dulint */
 /***********************************************************
+Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
+integer type. */
+UNIV_INLINE
+ib_longlong
+ut_conv_dulint_to_longlong(
+/*=======================*/
+			/* out: value in ib_longlong type */
+	dulint	d);	/* in: dulint */
+/***********************************************************
 Tests if a dulint is zero. */
 UNIV_INLINE
 ibool
diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic
index b8170392c8f..f0df9cc35a3 100644
--- a/innobase/include/ut0byte.ic
+++ b/innobase/include/ut0byte.ic
@@ -52,6 +52,20 @@ ut_dulint_get_low(
 }
 
 /***********************************************************
+Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
+integer type. */
+UNIV_INLINE
+ib_longlong
+ut_conv_dulint_to_longlong(
+/*=======================*/
+			/* out: value in ib_longlong type */
+	dulint	d)	/* in: dulint */
+{
+	return((ib_longlong)d.low
+		+ (((ib_longlong)d.high) << 32));
+}
+
+/***********************************************************
 Tests if a dulint is zero. */
 UNIV_INLINE
 ibool
diff --git a/innobase/include/ut0rnd.h b/innobase/include/ut0rnd.h
index a30251e6da0..c8ef0dd4001 100644
--- a/innobase/include/ut0rnd.h
+++ b/innobase/include/ut0rnd.h
@@ -35,7 +35,7 @@ ut_rnd_gen_next_ulint(
 /*************************************************************
 The following function generates 'random' ulint integers which
 enumerate the value space (let there be N of them) of ulint integers
-in a pseudo random fashion. Note that the same integer is repeated
+in a pseudo-random fashion. Note that the same integer is repeated
 always after N calls to the generator. */
 UNIV_INLINE
 ulint
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
index 4366b832ff6..338460d7de9 100644
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@@ -17,6 +17,16 @@ Created 1/20/1994 Heikki Tuuri
 
 typedef time_t	ib_time_t;
 
+/************************************************************
+Gets the high 32 bits in a ulint. That is makes a shift >> 32,
+but since there seem to be compiler bugs in both gcc and Visual C++,
+we do this by a special conversion. */
+
+ulint
+ut_get_high32(
+/*==========*/
+			/* out: a >> 32 */
+	ulint	a);	/* in: ulint */
 /**********************************************************
 Calculates the minimum of two ulints. */
 UNIV_INLINE
@@ -144,6 +154,15 @@ void
 ut_print_timestamp(
 /*===============*/
 	FILE*  file); /* in: file where to print */
+/**************************************************************
+Returns current year, month, day. */
+
+void
+ut_get_year_month_day(
+/*==================*/
+	ulint*	year,	/* out: current year */
+	ulint*	month,	/* out: month */
+	ulint*	day);	/* out: day */
 /*****************************************************************
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++. */
diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c
index f2309a5c562..b168ba41771 100644
--- a/innobase/lock/lock0lock.c
+++ b/innobase/lock/lock0lock.c
@@ -1535,6 +1535,8 @@ lock_rec_enqueue_waiting(
 
 	if (que_thr_stop(thr)) {
 
+		ut_a(0);
+
 		return(DB_QUE_THR_SUSPENDED);
 	}
 		
@@ -2919,6 +2921,7 @@ lock_table_enqueue_waiting(
 	stopped anyway */
 
 	if (que_thr_stop(thr)) {
+		ut_a(0);
 
 		return(DB_QUE_THR_SUSPENDED);
 	}
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
index e787176bb21..5ec1274d117 100644
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@@ -1,7 +1,7 @@
 /******************************************************
 Database log
 
-(c) 1995-1997 InnoDB Oy
+(c) 1995-1997 Innobase Oy
 
 Created 12/9/1995 Heikki Tuuri
 *******************************************************/
@@ -24,6 +24,9 @@ Created 12/9/1995 Heikki Tuuri
 #include "trx0sys.h"
 #include "trx0trx.h"
 
+/* Current free limit; protected by the log sys mutex; 0 means uninitialized */
+ulint	log_fsp_current_free_limit		= 0;
+
 /* Global log system variable */
 log_t*	log_sys	= NULL;
 
@@ -95,6 +98,32 @@ void
 log_archive_margin(void);
 /*====================*/
 
+/********************************************************************
+Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
+so that we know that the limit has been written to a log checkpoint field
+on disk. */
+
+void
+log_fsp_current_free_limit_set_and_checkpoint(
+/*==========================================*/
+	ulint	limit)	/* in: limit to set */
+{
+	ibool	success;
+
+	mutex_enter(&(log_sys->mutex));
+
+	log_fsp_current_free_limit = limit;
+
+	mutex_exit(&(log_sys->mutex));
+
+	/* Try to make a synchronous checkpoint */
+	
+	success = FALSE;
+
+	while (!success) {
+		success = log_checkpoint(TRUE, TRUE);
+	}
+}
 
 /********************************************************************
 Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
@@ -436,6 +465,51 @@ log_group_calc_lsn_offset(
 	return(log_group_calc_real_offset(offset, group));
 }
 
+/***********************************************************************
+Calculates where in log files we find a specified lsn. */
+
+ulint
+log_calc_where_lsn_is(
+/*==================*/
+						/* out: log file number */
+	ib_longlong*	log_file_offset,	/* out: offset in that file
+						(including the header) */
+	dulint		first_header_lsn,	/* in: first log file start
+						lsn */
+	dulint		lsn,			/* in: lsn whose position to
+						determine */
+	ulint		n_log_files,		/* in: total number of log
+						files */
+	ib_longlong	log_file_size)		/* in: log file size
+						(including the header) */
+{
+	ib_longlong	ib_lsn;
+	ib_longlong	ib_first_header_lsn;
+	ib_longlong	capacity	= log_file_size - LOG_FILE_HDR_SIZE;
+	ulint		file_no;
+	ib_longlong	add_this_many;
+	
+	ib_lsn = ut_conv_dulint_to_longlong(lsn);
+	ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn);
+
+	if (ib_lsn < ib_first_header_lsn) {
+		add_this_many = 1 + (ib_first_header_lsn - ib_lsn)
+				/ (capacity * (ib_longlong)n_log_files);
+		ib_lsn += add_this_many
+		          * capacity * (ib_longlong)n_log_files;
+	}
+
+	ut_a(ib_lsn >= ib_first_header_lsn);
+	
+	file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity))
+			  % n_log_files;
+	*log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity;
+
+	*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
+
+	return(file_no);
+}
+
 /************************************************************
 Sets the field values in group to correspond to a given lsn. For this function
 to work, the values must already be correctly initialized to correspond to
@@ -653,7 +727,7 @@ log_init(void)
 
 #ifdef UNIV_LOG_DEBUG
 	recv_sys_create();
-	recv_sys_init();
+	recv_sys_init(FALSE, buf_pool_get_curr_size());
 
 	recv_sys->parse_start_lsn = log_sys->lsn;
 	recv_sys->scanned_lsn = log_sys->lsn;
@@ -961,7 +1035,7 @@ log_group_write_buf(
 	ibool	sync;
 	ibool	write_header;
 	ulint	next_offset;
-
+	
 	ut_ad(mutex_own(&(log_sys->mutex)));
 	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
@@ -1002,9 +1076,28 @@ loop:
 	}
 	
 	if (log_debug_writes) {
+		ulint	i;
+
+		printf(
+		"Writing log file segment to group %lu offset %lu len %lu\n"
+		"start lsn %lu %lu\n",
+			group->id, next_offset, write_len,
+			ut_dulint_get_high(start_lsn),
+			ut_dulint_get_low(start_lsn));
 		printf(
-		"Writing log file segment to group %lu offset %lu len %lu\n",
-					group->id, next_offset, write_len);
+		"First block n:o %lu last block n:o %lu\n",
+			log_block_get_hdr_no(buf),
+			log_block_get_hdr_no(
+				buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
+		ut_a(log_block_get_hdr_no(buf)
+			== log_block_convert_lsn_to_no(start_lsn));
+		
+		for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
+
+			ut_a(log_block_get_hdr_no(buf) + i
+				== log_block_get_hdr_no(buf
+					+ i * OS_FILE_LOG_BLOCK_SIZE));
+		}
 	}
 
 	if (log_do_write) {
@@ -1346,7 +1439,7 @@ log_group_checkpoint(
 	ulint	i;
 
 	ut_ad(mutex_own(&(log_sys->mutex)));
-	ut_ad(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
+	ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
 	
 	buf = group->checkpoint_buf;
 	
@@ -1394,6 +1487,15 @@ log_group_checkpoint(
 			LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
 
+	/* Starting from InnoDB-3.23.50, we also write info on allocated
+	size in the tablespace */
+
+	mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
+						log_fsp_current_free_limit);
+
+	mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
+					LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
+
 	/* We alternate the physical place of the checkpoint info in the first
 	log file */
 	
@@ -1429,6 +1531,48 @@ log_group_checkpoint(
 }
 
 /**********************************************************
+Writes info to a buffer of a log group when log files are created in
+backup restoration. */
+
+void
+log_reset_first_header_and_checkpoint(
+/*==================================*/
+	byte*	hdr_buf,/* in: buffer which will be written to the start
+			of the first log file */
+	dulint	lsn)	/* in: lsn of the start of the first log file
+			+ LOG_BLOCK_HDR_SIZE */
+{
+	ulint	fold;
+	byte*	buf;
+
+	mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
+	mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, lsn);
+
+	buf = hdr_buf + LOG_CHECKPOINT_1;
+	
+	mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
+	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
+
+	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
+				LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
+								
+	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
+
+	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
+
+	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
+
+	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+			LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
+	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
+
+	/* Starting from InnoDB-3.23.50, we should also write info on
+	allocated size in the tablespace, but unfortunately we do not
+	know it here */
+}
+
+/**********************************************************
 Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
 
 void
@@ -2800,7 +2944,10 @@ log_check_log_recs(
 
 	ut_memcpy(scan_buf, start, end - start);
 	
-	recv_scan_log_recs(FALSE, scan_buf, end - start,
+	recv_scan_log_recs(TRUE,
+				buf_pool_get_curr_size() -
+				RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,	
+				FALSE, scan_buf, end - start,
 				ut_dulint_align_down(buf_start_lsn,
 						OS_FILE_LOG_BLOCK_SIZE),
 			&contiguous_lsn, &scanned_lsn);
diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c
index 29e87c7572b..c31719f7bb0 100644
--- a/innobase/log/log0recv.c
+++ b/innobase/log/log0recv.c
@@ -1,7 +1,7 @@
 /******************************************************
 Recovery
 
-(c) 1997 InnoDB Oy
+(c) 1997 Innobase Oy
 
 Created 9/20/1997 Heikki Tuuri
 *******************************************************/
@@ -33,13 +33,6 @@ Created 9/20/1997 Heikki Tuuri
 #include "dict0boot.h"
 #include "fil0fil.h"
 
-/* Size of block reads when the log groups are scanned forward to do a
-roll-forward */
-#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
-
-/* Size of the parsing buffer */
-#define RECV_PARSING_BUF_SIZE	LOG_BUFFER_SIZE
-
 /* Log records are stored in the hash table in chunks at most of this size;
 this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 #define RECV_DATA_BLOCK_SIZE	(MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
@@ -69,6 +62,9 @@ ibool	recv_no_ibuf_operations = FALSE;
 log scan */
 ulint	recv_scan_print_counter	= 0;
 
+ibool	recv_is_from_backup	= FALSE;
+
+
 /************************************************************
 Creates the recovery system. */
 
@@ -94,8 +90,11 @@ recv_sys_create(void)
 Inits the recovery system for a recovery operation. */
 
 void
-recv_sys_init(void)
-/*===============*/
+recv_sys_init(
+/*==========*/
+	ibool	recover_from_backup,	/* in: TRUE if this is called
+					to recover from a hot backup */
+	ulint	available_memory)	/* in: available memory in bytes */
 {
 	if (recv_sys->heap != NULL) {
 
@@ -104,13 +103,18 @@ recv_sys_init(void)
 
 	mutex_enter(&(recv_sys->mutex));
 
-	recv_sys->heap = mem_heap_create_in_buffer(256);
+	if (!recover_from_backup) {
+		recv_sys->heap = mem_heap_create_in_buffer(256);
+	} else {
+		recv_sys->heap = mem_heap_create(256);
+		recv_is_from_backup = TRUE;
+	}
 
 	recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
 	recv_sys->len = 0;
 	recv_sys->recovered_offset = 0;
 
-	recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 64);
+	recv_sys->addr_hash = hash_create(available_memory / 64);
 	recv_sys->n_addrs = 0;
 	
 	recv_sys->apply_log_recs = FALSE;
@@ -337,7 +341,7 @@ recv_synchronize_groups(
 	start_lsn = ut_dulint_align_down(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 	end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
-	ut_ad(ut_dulint_cmp(start_lsn, end_lsn) != 0);
+	ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
 
 	log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 					up_to_date_group, start_lsn, end_lsn);
@@ -377,6 +381,35 @@ recv_synchronize_groups(
 	mutex_enter(&(log_sys->mutex));
 }
 
+/***************************************************************************
+Checks the consistency of the checkpoint info */
+static
+ibool
+recv_check_cp_is_consistent(
+/*========================*/
+			/* out: TRUE if ok */
+	byte*	buf)	/* in: buffer containing checkpoint info */
+{
+	ulint	fold;
+
+	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+
+	if ((fold & 0xFFFFFFFF) != mach_read_from_4(buf
+				+ LOG_CHECKPOINT_CHECKSUM_1)) {		
+		return(FALSE);
+	}
+
+	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+			LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
+
+	if ((fold & 0xFFFFFFFF) != mach_read_from_4(buf
+					+ LOG_CHECKPOINT_CHECKSUM_2)) {
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
 /************************************************************
 Looks for the maximum consistent checkpoint from the log groups. */
 static
@@ -392,7 +425,6 @@ recv_find_max_checkpoint(
 	dulint		max_no;
 	dulint		checkpoint_no;
 	ulint		field;
-	ulint		fold;
 	byte*		buf;
 	
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
@@ -410,17 +442,11 @@ recv_find_max_checkpoint(
 	
 			log_group_read_checkpoint_info(group, field);
 
-			/* Check the consistency of the checkpoint info */
-			fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-
-			if ((fold & 0xFFFFFFFF)
-                                  != mach_read_from_4(buf
-						+ LOG_CHECKPOINT_CHECKSUM_1)) {
+			if (!recv_check_cp_is_consistent(buf)) {
 				if (log_debug_writes) {
 					fprintf(stderr, 
-	    "InnoDB: Checkpoint in group %lu at %lu invalid, %lu, %lu\n",
+	    "InnoDB: Checkpoint in group %lu at %lu invalid, %lu\n",
 						group->id, field,
-                                                fold & 0xFFFFFFFF,
                                  mach_read_from_4(buf
 					      + LOG_CHECKPOINT_CHECKSUM_1));
 
@@ -429,23 +455,6 @@ recv_find_max_checkpoint(
 				goto not_consistent;
 			}
 
-			fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
-						LOG_CHECKPOINT_CHECKSUM_2
-							- LOG_CHECKPOINT_LSN);
-			if ((fold & 0xFFFFFFFF)
-                                  != mach_read_from_4(buf
-						+ LOG_CHECKPOINT_CHECKSUM_2)) {
-				if (log_debug_writes) {
-					fprintf(stderr, 
-		"InnoDB: Checkpoint in group %lu at %lu invalid, %lu, %lu\n",
-						group->id, field,
-                                                fold & 0xFFFFFFFF,
-                                 mach_read_from_4(buf
-						  + LOG_CHECKPOINT_CHECKSUM_2));
-				}
-				goto not_consistent;
-			}
-
 			group->state = LOG_GROUP_OK;
 
 			group->lsn = mach_read_from_8(buf
@@ -476,7 +485,13 @@ recv_find_max_checkpoint(
 
 	if (*max_group == NULL) {
 
-		fprintf(stderr, "InnoDB: No valid checkpoint found\n");
+		fprintf(stderr,
+"InnoDB: No valid checkpoint found.\n"
+"InnoDB: If this error appears when you are creating an InnoDB database,\n"
+"InnoDB: the problem may be that during an earlier attempt you managed\n"
+"InnoDB: to create the InnoDB data files, but log file creation failed.\n"
+"InnoDB: If that is the case, please refer to section 3.1 of\n"
+"InnoDB: http://www.innodb.com/ibman.html\n");
 
 		return(DB_ERROR);
 	}
@@ -485,6 +500,162 @@ recv_find_max_checkpoint(
 }
 
 /***********************************************************************
+Reads the checkpoint info needed in hot backup. */
+
+ibool
+recv_read_cp_info_for_backup(
+/*=========================*/
+			/* out: TRUE if success */
+	byte*	hdr,	/* in: buffer containing the log group header */
+	dulint*	lsn,	/* out: checkpoint lsn */
+	ulint*	offset,	/* out: checkpoint offset in the log group */
+	ulint*	fsp_limit,/* out: fsp limit, 1000000000 if the database
+			is running with < version 3.23.50 of InnoDB */
+	dulint*	cp_no,	/* out: checkpoint number */
+	dulint*	first_header_lsn)
+			/* out: lsn of of the start of the first log file */
+{
+	ulint	max_cp		= 0;
+	dulint	max_cp_no	= ut_dulint_zero;
+	byte*	cp_buf;
+
+	cp_buf = hdr + LOG_CHECKPOINT_1;
+
+	if (recv_check_cp_is_consistent(cp_buf)) {
+		max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+		max_cp = LOG_CHECKPOINT_1;
+	}
+
+	cp_buf = hdr + LOG_CHECKPOINT_2;
+
+	if (recv_check_cp_is_consistent(cp_buf)) {
+		if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
+					max_cp_no) > 0) {
+			max_cp = LOG_CHECKPOINT_2;
+		}
+	}
+
+	if (max_cp == 0) {
+		return(FALSE);
+	}
+
+	cp_buf = hdr + max_cp;
+	
+	*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
+	*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
+
+	/* If the user is running a pre-3.23.50 version of InnoDB, its
+	checkpoint data does not contain the fsp limit info */
+	if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
+	    == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
+	
+		*fsp_limit = mach_read_from_4(
+				cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
+
+		if (*fsp_limit == 0) {
+			*fsp_limit = 1000000000;
+		}	
+	} else {
+		*fsp_limit = 1000000000;
+	}
+
+/*	printf("fsp limit %lu MB\n", *fsp_limit); */
+
+	*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+
+	*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
+
+	return(TRUE);
+}
+
+/***********************************************************************
+Scans the log segment and n_bytes_scanned is set to the length of valid
+log scanned. */
+
+void
+recv_scan_log_seg_for_backup(
+/*=========================*/
+	byte*		buf,		/* in: buffer containing log data */
+	ulint		buf_len,	/* in: data length in that buffer */
+	dulint*		scanned_lsn,	/* in/out: lsn of buffer start,
+					we return scanned lsn */
+	ulint*		scanned_checkpoint_no,
+					/* in/out: 4 lowest bytes of the
+					highest scanned checkpoint number so
+					far */
+	ulint*		n_bytes_scanned)/* out: how much we were able to
+					scan, smaller than buf_len if log
+					data ended here */
+{
+	ulint	data_len;
+	byte*	log_block;
+	ulint	no;
+
+	*n_bytes_scanned = 0;
+	
+	for (log_block = buf; log_block < buf + buf_len;
+				log_block += OS_FILE_LOG_BLOCK_SIZE) {
+	
+		no = log_block_get_hdr_no(log_block);
+
+		/* fprintf(stderr, "Log block header no %lu\n", no); */
+
+		if (no != log_block_get_trl_no(log_block)
+		    || no != log_block_convert_lsn_to_no(*scanned_lsn)) {
+
+/*			printf(
+"Log block n:o %lu, trailer n:o %lu, scanned lsn n:o %lu\n",
+			no, log_block_get_trl_no(log_block),
+			log_block_convert_lsn_to_no(*scanned_lsn));
+*/
+			/* Garbage or an incompletely written log block */
+
+			log_block += OS_FILE_LOG_BLOCK_SIZE;
+
+/*			printf(
+"Next log block n:o %lu, trailer n:o %lu\n",
+			log_block_get_hdr_no(log_block),
+			log_block_get_trl_no(log_block));
+*/			
+			break;
+		}
+
+		if (*scanned_checkpoint_no > 0
+		    && log_block_get_checkpoint_no(log_block)
+						< *scanned_checkpoint_no
+		    && *scanned_checkpoint_no
+			- log_block_get_checkpoint_no(log_block)
+							> 0x80000000) {
+
+			/* Garbage from a log buffer flush which was made
+			before the most recent database recovery */
+
+			printf("Scanned cp n:o %lu, block cp n:o %lu\n",
+				*scanned_checkpoint_no,
+				log_block_get_checkpoint_no(log_block));
+
+			break;
+		}
+
+		data_len = log_block_get_data_len(log_block);
+
+		*scanned_checkpoint_no
+				= log_block_get_checkpoint_no(log_block);
+		*scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
+
+		*n_bytes_scanned += data_len;
+		
+		if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
+			/* Log data ends here */
+
+			/* printf("Log block data len %lu\n", data_len); */
+
+			break;
+		}
+	}
+}
+
+/***********************************************************************
 Tries to parse a single log record body and also applies it to a page if
 specified. */
 static
@@ -625,7 +796,6 @@ recv_get_fil_addr_struct(
 
 	recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
 						recv_hash(space, page_no));
-	
 	while (recv_addr) {
 		if ((recv_addr->space == space)
 				&& (recv_addr->page_no == page_no)) {
@@ -755,6 +925,10 @@ read in, or also for a page already in the buffer pool. */
 void
 recv_recover_page(
 /*==============*/
+	ibool	recover_backup,	/* in: TRUE if we are recovering a backup
+				page: then we do not acquire any latches
+				since the page was read in outside the
+				buffer pool */
 	ibool	just_read_in,	/* in: TRUE if the i/o-handler calls this for
 				a freshly read page */
 	page_t*	page,		/* in: buffer page */
@@ -799,39 +973,48 @@ recv_recover_page(
 	
 	mutex_exit(&(recv_sys->mutex));
 
-	block = buf_block_align(page);
-
-	if (just_read_in) {
-		/* Move the ownership of the x-latch on the page to this OS
-		thread, so that we can acquire a second x-latch on it. This
-		is needed for the operations to the page to pass the debug
-		checks. */
+	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, MTR_LOG_NONE);
 
-		rw_lock_x_lock_move_ownership(&(block->lock));
-	}
+	if (!recover_backup) {	
+		block = buf_block_align(page);
 
-	mtr_start(&mtr);
+		if (just_read_in) {
+		  /* Move the ownership of the x-latch on the page to this OS
+		  thread, so that we can acquire a second x-latch on it. This
+		  is needed for the operations to the page to pass the debug
+		  checks. */
 
-	mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+			rw_lock_x_lock_move_ownership(&(block->lock));
+		}
 
-	success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD,
+		success = buf_page_get_known_nowait(RW_X_LATCH, page,
+					BUF_KEEP_OLD,
 					IB__FILE__, __LINE__,
 					&mtr);
-	ut_a(success);
+		ut_a(success);
 
-	buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+		buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+	}
 
 	/* Read the newest modification lsn from the page */
 	page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
 
-	/* It may be that the page has been modified in the buffer pool: read
-	the newest modification lsn there */
+	if (!recover_backup) {
+		/* It may be that the page has been modified in the buffer
+		pool: read the newest modification lsn there */
 		
-	page_newest_lsn = buf_frame_get_newest_modification(page);
+		page_newest_lsn = buf_frame_get_newest_modification(page);
 
-	if (!ut_dulint_is_zero(page_newest_lsn)) {
+		if (!ut_dulint_is_zero(page_newest_lsn)) {
 		
-		page_lsn = page_newest_lsn;
+			page_lsn = page_newest_lsn;
+		}
+	} else {
+		/* In recovery from a backup we do not use the buffer
+		pool */
+
+		page_newest_lsn = ut_dulint_zero;
 	}
 
 	modification_to_page = FALSE;
@@ -852,13 +1035,13 @@ recv_recover_page(
 			buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
 		}
 
-		if ((recv->type == MLOG_INIT_FILE_PAGE)
-		    || (recv->type == MLOG_FULL_PAGE)) {
-			/* A new file page may has been taken into use,
+		if (recv->type == MLOG_INIT_FILE_PAGE
+		    || recv->type == MLOG_FULL_PAGE) {
+			/* A new file page may have been taken into use,
 			or we have stored the full contents of the page:
 			in this case it may be that the original log record
 			type was MLOG_INIT_FILE_PAGE, and we replaced it
-			with MLOG_FULL_PAGE, thus to we have to apply
+			with MLOG_FULL_PAGE, thus we have to apply
 			any record of type MLOG_FULL_PAGE */
 			
 			page_lsn = page_newest_lsn;
@@ -885,6 +1068,13 @@ recv_recover_page(
 					
 			recv_parse_or_apply_log_rec_body(recv->type, buf,
 						buf + recv->len, page, &mtr);
+			mach_write_to_8(page + UNIV_PAGE_SIZE
+					- FIL_PAGE_END_LSN,
+					ut_dulint_add(recv->start_lsn,
+							recv->len));
+			mach_write_to_8(page + FIL_PAGE_LSN,
+					ut_dulint_add(recv->start_lsn,
+							recv->len));
 		}
 						
 		if (recv->len > RECV_DATA_BLOCK_SIZE) {
@@ -903,7 +1093,7 @@ recv_recover_page(
 
 	mutex_exit(&(recv_sys->mutex));
 	
-	if (modification_to_page) {
+	if (!recover_backup && modification_to_page) {
 		buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 	}
 	
@@ -1038,8 +1228,8 @@ loop:
 
 					buf_page_dbg_add_level(page,
 							SYNC_NO_ORDER_CHECK);
-					recv_recover_page(FALSE, page, space,
-								page_no);
+					recv_recover_page(FALSE, FALSE, page,
+							space, page_no);
 					mtr_commit(&mtr);
 				} else {
 					recv_read_in_area(space, page_no);
@@ -1112,6 +1302,95 @@ loop:
 }
 
 /***********************************************************************
+Applies log records in the hash table to a backup. */
+
+void
+recv_apply_log_recs_for_backup(
+/*===========================*/
+	ulint	n_data_files,	/* in: number of data files */
+	char**	data_files,	/* in: array containing the paths to the
+				data files */
+	ulint*	file_sizes)	/* in: sizes of the data files in database
+				pages */
+{
+	recv_addr_t*	recv_addr;
+	os_file_t	data_file;
+	ulint		n_pages_total	= 0;
+	ulint		nth_file	= 0;
+	ulint		nth_page_in_file= 0;
+	byte*		page;
+	ibool		success;
+	ulint		i;
+
+	recv_sys->apply_log_recs = TRUE;
+	recv_sys->apply_batch_on = TRUE;
+
+	page = buf_pool->frame_zero;
+	
+	for (i = 0; i < n_data_files; i++) {
+		n_pages_total += file_sizes[i];
+	}
+
+	printf( 
+"InnoDB: Starting an apply batch of log records to the database...\n"
+"InnoDB: Progress in percents: ");
+	
+	for (i = 0; i < n_pages_total; i++) {
+
+		if (i == 0 || nth_page_in_file == file_sizes[nth_file]) {
+			if (i != 0) {
+				nth_file++;
+				nth_page_in_file = 0;
+				os_file_flush(data_file);
+				os_file_close(data_file);
+			}
+
+			data_file = os_file_create_simple(data_files[nth_file],
+							OS_FILE_OPEN,
+							OS_FILE_READ_WRITE,
+							&success);
+			ut_a(success);
+		}
+		
+		recv_addr = recv_get_fil_addr_struct(0, i);
+
+		if (recv_addr != NULL) {
+			os_file_read(data_file, page,
+			  (nth_page_in_file << UNIV_PAGE_SIZE_SHIFT)
+				& 0xFFFFFFFF,
+			  nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT), 
+				UNIV_PAGE_SIZE);
+
+			recv_recover_page(TRUE, FALSE, page, 0, i);
+
+			buf_flush_init_for_writing(page,
+				mach_read_from_8(page + FIL_PAGE_LSN),
+				0, i);
+
+			os_file_write(data_files[nth_file],
+			  data_file, page,
+			  (nth_page_in_file << UNIV_PAGE_SIZE_SHIFT)
+				& 0xFFFFFFFF,
+			  nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT), 
+				UNIV_PAGE_SIZE);
+		}
+
+		if ((100 * i) / n_pages_total
+				!= (100 * (i + 1)) / n_pages_total) {
+			printf("%lu ", (100 * i) / n_pages_total);
+			fflush(stdout);
+		}
+
+		nth_page_in_file++;
+	}
+	
+	os_file_flush(data_file);
+	os_file_close(data_file);
+
+	recv_sys_empty_hash();
+}
+
+/***********************************************************************
 In the debug version, updates the replica of a file page, based on a log
 record. */
 static
@@ -1430,12 +1709,13 @@ recv_check_incomplete_log_recs(
 
 /***********************************************************
 Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages. If the hash table becomes too full, applies it
-automatically to file pages. */
-
-void
+merging to file pages. */
+static
+ibool
 recv_parse_log_recs(
 /*================*/
+				/* out: TRUE if the hash table of parsed log
+				records became full */
 	ibool	store_to_hash)	/* in: TRUE if the records should be stored
 				to the hash table; this is set to FALSE if just
 				debug checking is needed */
@@ -1462,7 +1742,7 @@ loop:
 
 	if (ptr == end_ptr) {
 
-		return;
+		return(FALSE);
 	}
 
 	single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
@@ -1476,7 +1756,7 @@ loop:
 							&page_no, &body);
 		if (len == 0) {
 
-			return;
+			return(FALSE);
 		}
 
 		new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
@@ -1487,7 +1767,7 @@ loop:
 			that also the next log block should have been scanned
 			in */
 
-			return;
+			return(FALSE);
 		}
 		
 		recv_sys->recovered_offset += len;
@@ -1529,7 +1809,7 @@ loop:
 							&page_no, &body);
 			if (len == 0) {
 
-				return;
+				return(FALSE);
 			}
 
 			if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
@@ -1570,27 +1850,9 @@ loop:
 			that also the next log block should have been scanned
 			in */
 
-			return;
-		}
-
-		if (2 * n_recs * (sizeof(recv_t) + sizeof(recv_addr_t))
-			+ total_len
-			+ mem_heap_get_size(recv_sys->heap)
-	    		+ RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE
-					> buf_pool_get_curr_size()) {
-
-			/* Hash table of log records will grow too big:
-			empty it */
-					
-			recv_apply_hashed_log_recs(FALSE);
+			return(FALSE);
 		}
 
-		ut_ad(2 * n_recs * (sizeof(recv_t) + sizeof(recv_addr_t))
-			+ total_len
-			+ mem_heap_get_size(recv_sys->heap)
-	    		+ RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE
-					< buf_pool_get_curr_size());
-
 		/* Add all the records to the hash table */
 
 		ptr = recv_sys->buf + recv_sys->recovered_offset;
@@ -1627,18 +1889,7 @@ loop:
 			ptr += len;
 		}
 	}
-
-	if (store_to_hash && buf_get_free_list_len()
-					< RECV_POOL_N_FREE_BLOCKS) {
-
-		/* Hash table of log records has grown too big: empty it;
-		FALSE means no ibuf operations allowed, as we cannot add
-		new records to the log yet: they would be produced by ibuf
-		operations */
-
-		recv_apply_hashed_log_recs(FALSE);
-	}	    
-
+   
 	goto loop;
 }
 
@@ -1713,7 +1964,7 @@ recv_sys_add_to_parsing_buf(
 
 		recv_sys->len += end_offset - start_offset;
 
-		ut_ad(recv_sys->len <= RECV_PARSING_BUF_SIZE);
+		ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 	}
 
 	return(TRUE);
@@ -1743,6 +1994,13 @@ recv_scan_log_recs(
 /*===============*/
 				/* out: TRUE if limit_lsn has been reached, or
 				not able to scan any more in this log group */
+	ibool	apply_automatically,/* in: TRUE if we want this function to
+				apply log records automatically when the
+				hash table becomes full; in the hot backup tool
+				the tool does the applying, not this
+				function */
+	ulint	available_memory,/* in: we let the hash table of recs to grow
+				to this size, at the maximum */
 	ibool	store_to_hash,	/* in: TRUE if the records should be stored
 				to the hash table; this is set to FALSE if just
 				debug checking is needed */
@@ -1764,7 +2022,9 @@ recv_scan_log_recs(
 	ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len > 0);
-
+	ut_a(apply_automatically <= TRUE);
+	ut_a(store_to_hash <= TRUE);
+	
 	finished = FALSE;
 	
 	log_block = buf;
@@ -1845,6 +2105,13 @@ recv_scan_log_recs(
 			/* We were able to find more log data: add it to the
 			parsing buffer if parse_start_lsn is already non-zero */
 
+			if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
+						>= RECV_PARSING_BUF_SIZE) {
+				fprintf(stderr,
+"InnoDB: Error: log parsing buffer overflow. Recovery may have failed!\n");
+				finished = TRUE;
+			}
+
 			more_data = recv_sys_add_to_parsing_buf(log_block,
 								scanned_lsn);
 			recv_sys->scanned_lsn = scanned_lsn;
@@ -1863,25 +2130,36 @@ recv_scan_log_recs(
 
 	*group_scanned_lsn = scanned_lsn;
 
-	if (more_data) {
+	if (recv_needed_recovery || recv_is_from_backup) {
 		recv_scan_print_counter++;
 
-		if (recv_scan_print_counter < 10
-		    || (recv_scan_print_counter % 10 == 0)) {
+		if (finished || (recv_scan_print_counter % 80 == 0)) {
+
 			fprintf(stderr, 
 "InnoDB: Doing recovery: scanned up to log sequence number %lu %lu\n",
 				ut_dulint_get_high(*group_scanned_lsn),
 				ut_dulint_get_low(*group_scanned_lsn));
-			if (recv_scan_print_counter == 10) {
-				fprintf(stderr,
-"InnoDB: After this prints a line for every 10th scan sweep:\n");
-			}
 		}
+	}
 
+	if (more_data) {
 		/* Try to parse more log records */
 
 		recv_parse_log_recs(store_to_hash);
 
+		if (store_to_hash && mem_heap_get_size(recv_sys->heap)
+						> available_memory
+		    && apply_automatically) {
+						
+			/* Hash table of log records has grown too big:
+			empty it; FALSE means no ibuf operations
+			allowed, as we cannot add new records to the
+			log yet: they would be produced by ibuf
+			operations */
+		
+			recv_apply_hashed_log_recs(FALSE);
+		} 
+
 		if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 			/* Move parsing buffer data to the buffer start */
 
@@ -1918,10 +2196,12 @@ recv_group_scan_log_recs(
 		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 						group, start_lsn, end_lsn);
 
-		finished = recv_scan_log_recs(TRUE, log_sys->buf,
-						RECV_SCAN_SIZE, start_lsn,
-						contiguous_lsn,
-						group_scanned_lsn);
+		finished = recv_scan_log_recs(TRUE,
+				buf_pool_get_curr_size()
+				- RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
+				TRUE, log_sys->buf,
+				RECV_SCAN_SIZE, start_lsn,
+				contiguous_lsn, group_scanned_lsn);
 		start_lsn = end_lsn;
 	}
 
@@ -1969,7 +2249,7 @@ recv_recovery_from_checkpoint_start(
 	if (type == LOG_CHECKPOINT) {
 
 		recv_sys_create();
-		recv_sys_init();
+		recv_sys_init(FALSE, buf_pool_get_curr_size());
 	}
 
 	if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
@@ -2281,6 +2561,84 @@ recv_reset_logs(
 }
 
 /**********************************************************
+Creates new log files after a backup has been restored. */
+
+void
+recv_reset_log_files_for_backup(
+/*============================*/
+	char*	log_dir,	/* in: log file directory path */
+	ulint	n_log_files,	/* in: number of log files */
+	ulint	log_file_size,	/* in: log file size */
+	dulint	lsn)		/* in: new start lsn, must be divisible by
+				OS_FILE_LOG_BLOCK_SIZE */
+{
+	os_file_t	log_file;
+	ibool		success;
+	byte*		buf;
+	ulint		i;
+	char		name[5000];
+	
+	buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+	
+	for (i = 0; i < n_log_files; i++) {
+
+		sprintf(name, "%sib_logfile%lu", log_dir, i);
+
+		log_file = os_file_create_simple(name, OS_FILE_CREATE,
+						OS_FILE_READ_WRITE, &success);
+		if (!success) {
+			printf(
+"InnoDB: Cannot create %s. Check that the file does not exist yet.\n", name);
+
+			exit(1);
+		}
+
+		printf(
+"Setting log file size to %lu %lu\n", ut_get_high32(log_file_size),
+						log_file_size & 0xFFFFFFFF);
+
+		success = os_file_set_size(name, log_file,
+					log_file_size & 0xFFFFFFFF,
+					ut_get_high32(log_file_size));
+
+		if (!success) {
+			printf(
+"InnoDB: Cannot set %s size to %lu %lu\n", name, ut_get_high32(log_file_size),
+						log_file_size & 0xFFFFFFFF);
+			exit(1);
+		}
+
+		os_file_flush(log_file);
+		os_file_close(log_file);
+	}
+
+	/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
+	
+	log_reset_first_header_and_checkpoint(buf,
+				ut_dulint_add(lsn, LOG_BLOCK_HDR_SIZE));
+	
+	log_block_init(buf + LOG_FILE_HDR_SIZE, lsn);
+	log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
+							LOG_BLOCK_HDR_SIZE);
+	sprintf(name, "%sib_logfile%lu", log_dir, 0);
+
+	log_file = os_file_create_simple(name, OS_FILE_OPEN,
+						OS_FILE_READ_WRITE, &success);
+	if (!success) {
+		printf("InnoDB: Cannot open %s.\n", name);
+
+		exit(1);
+	}
+
+	os_file_write(name, log_file, buf, 0, 0,
+				LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+	os_file_flush(log_file);
+	os_file_close(log_file);
+
+	ut_free(buf);
+}
+
+/**********************************************************
 Reads from the archive of a log group and performs recovery. */
 static
 ibool
@@ -2296,13 +2654,13 @@ log_group_recover_from_archive_file(
 	dulint	dummy_lsn;
 	dulint	scanned_lsn;
 	ulint	len;
-	char	name[10000];
 	ibool	ret;
 	byte*	buf;
 	ulint	read_offset;
 	ulint	file_size;
 	ulint	file_size_high;
 	int	input_char;
+	char	name[10000];
 
 try_open_again:	
 	buf = log_sys->buf;
@@ -2438,9 +2796,11 @@ ask_again:
 			group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
 			read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
 
-		
-		ret = recv_scan_log_recs(TRUE, buf, len, start_lsn,
-						&dummy_lsn, &scanned_lsn);
+		ret = recv_scan_log_recs(TRUE,
+				buf_pool_get_curr_size() -
+				RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
+				TRUE, buf, len, start_lsn,
+				&dummy_lsn, &scanned_lsn);
 
 		if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
 
@@ -2485,7 +2845,7 @@ recv_recovery_from_archive_start(
 	ulint		err;
 	
 	recv_sys_create();
-	recv_sys_init();
+	recv_sys_init(FALSE, buf_pool_get_curr_size());
 
 	sync_order_checks_on = TRUE;
 	
diff --git a/innobase/mem/mem0mem.c b/innobase/mem/mem0mem.c
index 0680968a7eb..94cf85dfd63 100644
--- a/innobase/mem/mem0mem.c
+++ b/innobase/mem/mem0mem.c
@@ -234,7 +234,8 @@ mem_heap_add_block(
 	new_size = 2 * mem_block_get_len(block);
 
 	if (heap->type != MEM_HEAP_DYNAMIC) {
-		ut_ad(n <= MEM_MAX_ALLOC_IN_BUF);
+		/* From the buffer pool we allocate buffer frames */
+		ut_a(n <= MEM_MAX_ALLOC_IN_BUF);
 
 		if (new_size > MEM_MAX_ALLOC_IN_BUF) {
 			new_size = MEM_MAX_ALLOC_IN_BUF;
@@ -249,7 +250,7 @@ mem_heap_add_block(
 	}
 	
 	new_block = mem_heap_create_block(heap, new_size, NULL, heap->type,
-					heap->file_name, heap->line);
+						heap->file_name, heap->line);
 	if (new_block == NULL) {
 
 		return(NULL);
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index 010716d8d17..ee4045febde 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -10,17 +10,22 @@ Created 10/21/1995 Heikki Tuuri
 #include "os0sync.h"
 #include "ut0mem.h"
 #include "srv0srv.h"
-#include "trx0sys.h"
 #include "fil0fil.h"
 
 #undef HAVE_FDATASYNC
 
+#undef UNIV_NON_BUFFERED_IO
+
 #ifdef POSIX_ASYNC_IO
 /* We assume in this case that the OS has standard Posix aio (at least SunOS
 2.6, HP-UX 11i and AIX 4.3 have) */
 
 #endif
 
+/* If the following is set to TRUE, we do not call os_file_flush in every
+os_file_write. We can set this TRUE if the doublewrite buffer is used. */
+ibool	os_do_not_call_flush_at_each_write	= FALSE;
+
 /* We use these mutexes to protect lseek + file i/o operation, if the
 OS does not provide an atomic pread or pwrite, or similar */
 #define OS_FILE_N_SEEK_MUTEXES	16
@@ -118,6 +123,9 @@ ulint	os_n_file_writes_old	= 0;
 ulint	os_n_fsyncs_old		= 0;
 time_t	os_last_printout;
 
+ibool	os_has_said_disk_full	= FALSE;
+
+
 /***************************************************************************
 Gets the operating system version. Currently works only on Windows. */
 
@@ -167,27 +175,28 @@ os_file_get_last_error(void)
 
 	err = (ulint) GetLastError();
 
-	if (err != ERROR_FILE_EXISTS) {
-	         fprintf(stderr,
-  "InnoDB: Operating system error number %li in a file operation.\n"
+	if (err != ERROR_FILE_EXISTS && err != ERROR_DISK_FULL) {
+		ut_print_timestamp(stderr);
+	     	fprintf(stderr,
+  "  InnoDB: Operating system error number %li in a file operation.\n"
   "InnoDB: See http://www.innodb.com/ibman.html for installation help.\n",
 		(long) err);
 
-		 if (err == ERROR_PATH_NOT_FOUND) {
+		if (err == ERROR_PATH_NOT_FOUND) {
 		         fprintf(stderr,
   "InnoDB: The error means the system cannot find the path specified.\n"
   "InnoDB: In installation you must create directories yourself, InnoDB\n"
   "InnoDB: does not create them.\n");
-		 } else if (err == ERROR_ACCESS_DENIED) {
+		} else if (err == ERROR_ACCESS_DENIED) {
 		         fprintf(stderr,
   "InnoDB: The error means mysqld does not have the access rights to\n"
   "InnoDB: the directory. It may also be you have created a subdirectory\n"
   "InnoDB: of the same name as a data file.\n"); 
-		 } else {
+		} else {
 		         fprintf(stderr,
   "InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
   "InnoDB: what the error number means.\n");
-		 }
+		}
 	}
 
 	if (err == ERROR_FILE_NOT_FOUND) {
@@ -202,26 +211,28 @@ os_file_get_last_error(void)
 #else
 	err = (ulint) errno;
 
-	if (err != EEXIST) {
-	         fprintf(stderr,
-  "InnoDB: Operating system error number %li in a file operation.\n"
+	if (err != EEXIST && err != ENOSPC ) {
+		ut_print_timestamp(stderr);
+
+	     	fprintf(stderr,
+  "  InnoDB: Operating system error number %li in a file operation.\n"
   "InnoDB: See http://www.innodb.com/ibman.html for installation help.\n",
 		(long) err);
 
-		 if (err == ENOENT) {
+		if (err == ENOENT) {
 		         fprintf(stderr,
   "InnoDB: The error means the system cannot find the path specified.\n"
   "InnoDB: In installation you must create directories yourself, InnoDB\n"
   "InnoDB: does not create them.\n");
-		 } else if (err == EACCES) {
+		} else if (err == EACCES) {
 		         fprintf(stderr,
   "InnoDB: The error means mysqld does not have the access rights to\n"
   "InnoDB: the directory.\n");
-		 } else {
+		} else {
 		         fprintf(stderr,
   "InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
   "InnoDB: what the error number means or use the perror program of MySQL.\n");
-		 }
+		}
 	}
 
 	if (err == ENOSPC ) {
@@ -259,18 +270,26 @@ os_file_handle_error(
 	err = os_file_get_last_error();
 	
 	if (err == OS_FILE_DISK_FULL) {
-		fprintf(stderr, "\n");
+		/* We only print a warning about disk full once */
+
+		if (os_has_said_disk_full) {
+
+			return(FALSE);
+		}
+	
 		if (name) {
-		        fprintf(stderr,
-			  "InnoDB: Encountered a problem with file %s.\n",
-									name);
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+	"  InnoDB: Encountered a problem with file %s\n", name);
 		}
+
+		ut_print_timestamp(stderr);
 	        fprintf(stderr,
-	   "InnoDB: Cannot continue operation.\n"
-	   "InnoDB: Disk is full. Try to clean the disk to free space.\n"
-	   "InnoDB: Delete a possible created file and restart.\n");
+	"  InnoDB: Disk is full. Try to clean the disk to free space.\n");
 
-		exit(1);
+		os_has_said_disk_full = TRUE;
+
+		return(FALSE);
 
 	} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
 		return(TRUE);
@@ -291,6 +310,130 @@ os_file_handle_error(
 }
 
 /********************************************************************
+Creates the seek mutexes used in positioned reads and writes. */
+
+void
+os_io_init_simple(void)
+/*===================*/
+{
+	ulint	i;
+
+	for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+		os_file_seek_mutexes[i] = os_mutex_create(NULL);
+	}
+}
+
+/********************************************************************
+A simple function to open or create a file. */
+
+os_file_t
+os_file_create_simple(
+/*==================*/
+			/* out, own: handle to the file, not defined if error,
+			error number can be retrieved with os_get_last_error */
+	char*	name,	/* in: name of the file or path as a null-terminated
+			string */
+	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
+			(if does not exist, error), or OS_FILE_CREATE if a new
+			file is created (if exists, error) */
+	ulint	access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
+	ibool*	success)/* out: TRUE if succeed, FALSE if error */
+{
+#ifdef __WIN__
+	os_file_t	file;
+	DWORD		create_flag;
+	DWORD		access;
+	DWORD		attributes	= 0;
+	ibool		retry;
+	
+try_again:	
+	ut_a(name);
+
+	if (create_mode == OS_FILE_OPEN) {
+		create_flag = OPEN_EXISTING;
+	} else if (create_mode == OS_FILE_CREATE) {
+		create_flag = CREATE_NEW;
+	} else {
+		create_flag = 0;
+		ut_error;
+	}
+
+	if (access_type == OS_FILE_READ_ONLY) {
+		access = GENERIC_READ;
+	} else if (access_type == OS_FILE_READ_WRITE) {
+		access = GENERIC_READ | GENERIC_WRITE;
+	} else {
+		access = 0;
+		ut_error;
+	}
+
+	file = CreateFile(name,
+			access,
+			FILE_SHARE_READ | FILE_SHARE_WRITE,
+					/* file can be read and written
+					also by other processes */
+			NULL,	/* default security attributes */
+			create_flag,
+			attributes,
+			NULL);	/* no template file */
+
+	if (file == INVALID_HANDLE_VALUE) {
+		*success = FALSE;
+
+		retry = os_file_handle_error(file, name);
+
+		if (retry) {
+			goto try_again;
+		}
+	} else {
+		*success = TRUE;
+	}
+
+	return(file);
+#else
+	os_file_t	file;
+	int		create_flag;
+	ibool		retry;
+	
+try_again:	
+	ut_a(name);
+
+	if (create_mode == OS_FILE_OPEN) {
+		if (access_type == OS_FILE_READ_ONLY) {
+			create_flag = O_RDONLY;
+		} else {
+			create_flag = O_RDWR;
+		}
+	} else if (create_mode == OS_FILE_CREATE) {
+		create_flag = O_RDWR | O_CREAT | O_EXCL;
+	} else {
+		create_flag = 0;
+		ut_error;
+	}
+
+	if (create_mode == OS_FILE_CREATE) {
+	        file = open(name, create_flag, S_IRUSR | S_IWUSR | S_IRGRP
+			                     | S_IWGRP | S_IROTH | S_IWOTH);
+        } else {
+                file = open(name, create_flag);
+        }
+	
+	if (file == -1) {
+		*success = FALSE;
+
+		retry = os_file_handle_error(file, name);
+
+		if (retry) {
+			goto try_again;
+		}
+	} else {
+		*success = TRUE;
+	}
+
+	return(file);	
+#endif
+}
+/********************************************************************
 Opens an existing file or creates a new. */
 
 os_file_t
@@ -355,8 +498,9 @@ try_again:
 	file = CreateFile(name,
 			GENERIC_READ | GENERIC_WRITE, /* read and write
 							access */
-			FILE_SHARE_READ,/* file can be read by other
-					processes */
+			FILE_SHARE_READ | FILE_SHARE_WRITE,
+					/* file can be read and written
+					also by other processes */
 			NULL,	/* default security attributes */
 			create_flag,
 			attributes,
@@ -494,6 +638,11 @@ os_file_get_size(
 
 	offs = lseek(file, 0, SEEK_END);
 
+	if (offs == ((off_t)-1)) {
+
+		return(FALSE);
+	}
+	
 #if SIZEOF_OFF_T > 4
 	*size = (ulint)(offs & 0xFFFFFFFF);
 	*size_high = (ulint)(offs >> 32);
@@ -523,13 +672,11 @@ os_file_set_size(
 	ib_longlong	low;
 	ulint   	n_bytes;
 	ibool		ret;
-	ibool		retry;
 	byte*   	buf;
 	ulint   	i;
 
 	ut_a(size == (size & 0xFFFFFFFF));
 
-try_again:
 	/* We use a very big 8 MB buffer in writing because Linux may be
 	extremely slow in fsync on 1 MB writes */
 
@@ -570,14 +717,6 @@ try_again:
 	}
 
 error_handling:
-	retry = os_file_handle_error(file, name); 
-
-	if (retry) {
-		goto try_again;
-	}
-	
-	ut_error;
-
 	return(FALSE);
 }
 
@@ -722,8 +861,7 @@ os_file_pwrite(
 	64-bit address */
 
         if (sizeof(off_t) > 4) {
-	  offs = (off_t)offset + (((off_t)offset_high) << 32);
-        				
+	  	offs = (off_t)offset + (((off_t)offset_high) << 32);
         } else {
         	offs = (off_t)offset;
 
@@ -740,8 +878,8 @@ os_file_pwrite(
 
 	if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
 	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
-	    && !trx_doublewrite) {
-
+	    && !os_do_not_call_flush_at_each_write) {
+	    	
 	        /* Always do fsync to reduce the probability that when
                 the OS crashes, a database page is only partially
                 physically written to disk. */
@@ -771,7 +909,7 @@ os_file_pwrite(
 
 	if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
 	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
-	    && !trx_doublewrite) {
+	    && !os_do_not_call_flush_at_each_write) {
 
 	        /* Always do fsync to reduce the probability that when
                 the OS crashes, a database page is only partially
@@ -896,13 +1034,12 @@ os_file_write(
 	DWORD		ret2;
 	DWORD		low;
 	DWORD		high;
-	ibool		retry;
 	ulint		i;
 
 	ut_a((offset & 0xFFFFFFFF) == offset);
 
 	os_n_file_writes++;
-try_again:	
+
 	ut_ad(file);
 	ut_ad(buf);
 	ut_ad(n > 0);
@@ -921,7 +1058,15 @@ try_again:
 
 		os_mutex_exit(os_file_seek_mutexes[i]);
 		
-		goto error_handling;
+		ut_print_timestamp(stderr);
+
+		fprintf(stderr,
+"  InnoDB: Error: File pointer positioning to file %s failed at\n"
+"InnoDB: offset %lu %lu. Operating system error number %lu.\n",
+			name, offset_high, offset,
+			(ulint)GetLastError());
+
+		return(FALSE);
 	} 
 
 	ret = WriteFile(file, buf, n, &len, NULL);
@@ -929,38 +1074,61 @@ try_again:
 	/* Always do fsync to reduce the probability that when the OS crashes,
 	a database page is only partially physically written to disk. */
 
-	if (!trx_doublewrite) {
+	if (!os_do_not_call_flush_at_each_write) {
 		ut_a(TRUE == os_file_flush(file));
 	}
 
 	os_mutex_exit(os_file_seek_mutexes[i]);
 
 	if (ret && len == n) {
+
 		return(TRUE);
 	}
+
+	if (!os_has_said_disk_full) {
+	
+		ut_print_timestamp(stderr);
+
+		fprintf(stderr,
+"  InnoDB: Error: Write to file %s failed at offset %lu %lu.\n"
+"InnoDB: %lu bytes should have been written, only %lu were written.\n"
+"InnoDB: Operating system error number %lu.\n"
+"InnoDB: Check that your OS and file system support files of this size.\n"
+"InnoDB: Check also the disk is not full or a disk quota exceeded.\n",
+			name, offset_high, offset, n, len,
+			(ulint)GetLastError());
+
+		os_has_said_disk_full = TRUE;
+	}
+
+	return(FALSE);
 #else
-	ibool	retry;
 	ssize_t	ret;
 	
-try_again:
 	ret = os_file_pwrite(file, buf, n, offset, offset_high);
 
 	if ((ulint)ret == n) {
+
 		return(TRUE);
 	}
-#endif
-#ifdef __WIN__
-error_handling:		
-#endif
-	retry = os_file_handle_error(file, name); 
 
-	if (retry) {
-		goto try_again;
-	}
+	if (!os_has_said_disk_full) {
 	
-	ut_error;
+		ut_print_timestamp(stderr);
 
-	return(FALSE);
+		fprintf(stderr,
+"  InnoDB: Error: Write to file %s failed at offset %lu %lu.\n"
+"InnoDB: %lu bytes should have been written, only %lu were written.\n"
+"InnoDB: Operating system error number %lu.\n"
+"InnoDB: Check that your OS and file system support files of this size.\n"
+"InnoDB: Check also the disk is not full or a disk quota exceeded.\n",
+			name, offset_high, offset, n, ret, (ulint)errno);
+
+		os_has_said_disk_full = TRUE;
+	}
+
+	return(FALSE);	
+#endif
 }
 
 /********************************************************************
@@ -1031,7 +1199,8 @@ os_aio_array_create(
 }
 
 /****************************************************************************
-Initializes the asynchronous io system. Creates separate aio array for
+Initializes the asynchronous io system. Calls also os_io_init_simple.
+Creates a separate aio array for
 non-ibuf read and write, a third aio array for the ibuf i/o, with just one
 segment, two aio arrays for log reads and writes with one segment, and a
 synchronous aio array of the specified size. The combined number of segments
@@ -1058,6 +1227,8 @@ os_aio_init(
 	ut_ad(n % n_segments == 0);
 	ut_ad(n_segments >= 4);
 
+	os_io_init_simple();
+
 	n_per_seg = n / n_segments;
 	n_write_segs = (n_segments - 2) / 2;
 	n_read_segs = n_segments - 2 - n_write_segs;
@@ -1078,10 +1249,6 @@ os_aio_init(
 
 	os_aio_validate();
 
-	for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
-		os_file_seek_mutexes[i] = os_mutex_create(NULL);
-	}
-
 	os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
 
 	for (i = 0; i < n_segments; i++) {
@@ -1739,7 +1906,8 @@ os_aio_windows_handle(
 	if (ret && len == slot->len) {
 		ret_val = TRUE;
 
-		if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
+		if (slot->type == OS_FILE_WRITE
+				&& !os_do_not_call_flush_at_each_write) {
 		         ut_a(TRUE == os_file_flush(slot->file));
 		}
 	} else {
@@ -1824,7 +1992,8 @@ os_aio_posix_handle(
 	*message1 = slot->message1;
 	*message2 = slot->message2;
 
-	if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
+	if (slot->type == OS_FILE_WRITE
+				&& !os_do_not_call_flush_at_each_write) {
 		ut_a(TRUE == os_file_flush(slot->file));
 	}
 
diff --git a/innobase/pars/pars0opt.c b/innobase/pars/pars0opt.c
index 3c378ec8ba2..91083e6fa16 100644
--- a/innobase/pars/pars0opt.c
+++ b/innobase/pars/pars0opt.c
@@ -527,7 +527,8 @@ opt_search_plan_for_table(
 	dict_index_t*	best_index;
 	ulint		n_fields;
 	ulint		goodness;
-	ulint		last_op;
+	ulint		last_op		= 75946965;	/* Eliminate a Purify
+							warning */
 	ulint		best_goodness;
 	ulint		best_last_op = 0; /* remove warning */
 	ulint		mix_id_pos;
diff --git a/innobase/que/que0que.c b/innobase/que/que0que.c
index 1cee316f32c..7fa444f6741 100644
--- a/innobase/que/que0que.c
+++ b/innobase/que/que0que.c
@@ -555,6 +555,12 @@ que_graph_free_recursive(
 			btr_pcur_free_for_mysql(upd->pcur);
 		}
 
+		que_graph_free_recursive(upd->cascade_node);		
+
+		if (upd->cascade_heap) {
+			mem_heap_free(upd->cascade_heap);
+		}
+		
 		que_graph_free_recursive(upd->select);
 
 		mem_heap_free(upd->heap);
@@ -1110,9 +1116,6 @@ que_thr_move_to_run_state_for_mysql(
 		trx->n_active_thrs++;
 
 		thr->is_active = TRUE;
-
-		ut_ad((thr->graph)->n_active_thrs == 1);
-		ut_ad(trx->n_active_thrs == 1);
 	}
 	
 	thr->state = QUE_THR_RUNNING;
diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c
index 9a5a0eb6e8e..c27af604d04 100644
--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@@ -55,7 +55,8 @@ cmp_debug_dtuple_rec_with_match(
 				contains the value for current comparison */
 /*****************************************************************
 This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. */
+is such that we must use MySQL code to compare them. The prototype here
+must be a copy of the the one in ha_innobase.cc! */
 
 int
 innobase_mysql_cmp(
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c
index c3f912d5f61..ed4df08fcf3 100644
--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@@ -356,6 +356,227 @@ row_ins_dupl_error_with_rec(
 }	
 
 /*************************************************************************
+Either deletes or sets the referencing columns SQL NULL in a child row.
+Used in ON DELETE ... clause for foreign keys when a parent row is
+deleted. */
+static
+ulint
+row_ins_foreign_delete_or_set_null(
+/*===============================*/
+					/* out: DB_SUCCESS, DB_LOCK_WAIT,
+					or error code */
+	que_thr_t*	thr,		/* in: query thread whose run_node
+					is an update node */
+	dict_foreign_t*	foreign,	/* in: foreign key constraint whose
+					type is != 0 */
+	btr_pcur_t*	pcur,		/* in: cursor placed on a matching
+					index record in the child table */
+	mtr_t*		mtr)		/* in: mtr holding the latch of pcur
+					page */
+{
+	upd_node_t*	node;
+	upd_node_t*	cascade;
+	dict_table_t*	table		= foreign->foreign_table;
+	dict_index_t*	index;
+	dict_index_t*	clust_index;
+	dtuple_t*	ref;
+	mem_heap_t*	tmp_heap;
+	rec_t*		rec;
+	rec_t*		clust_rec;
+	upd_t*		update;
+	ulint		err;
+	ulint		i;
+	char		err_buf[1000];
+	
+	ut_a(thr && foreign && pcur && mtr);
+
+	node = thr->run_node;
+
+	if (node->cascade_node == NULL) {
+		/* Extend our query graph by creating a child to current
+		update node. The child is used in the cascade or set null
+		operation. */
+
+		node->cascade_heap = mem_heap_create(128);
+		node->cascade_node = row_create_update_node_for_mysql(
+						table, node->cascade_heap);
+		que_node_set_parent(node->cascade_node, node);
+	}
+
+	/* Initialize cascade_node to do the operation we want. Note that we
+	use the SAME cascade node to do all foreign key operations of the
+	SQL DELETE: the table of the cascade node may change if there are
+	several child tables to the table where the delete is done! */
+
+	cascade = node->cascade_node;
+	
+	cascade->table = table;
+
+	if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE ) {
+		cascade->is_delete = TRUE;
+	} else {
+		cascade->is_delete = FALSE;
+
+		if (foreign->n_fields > cascade->update_n_fields) {
+			/* We have to make the update vector longer */
+
+			cascade->update = upd_create(foreign->n_fields,
+							node->cascade_heap);
+			cascade->update_n_fields = foreign->n_fields;
+		}
+	}
+
+	index = btr_pcur_get_btr_cur(pcur)->index;
+
+	rec = btr_pcur_get_rec(pcur);
+
+	if (index->type & DICT_CLUSTERED) {
+		/* pcur is already positioned in the clustered index of
+		the child table */
+	
+		clust_index = index;
+		clust_rec = rec;
+	} else {
+		/* We have to look for the record in the clustered index
+		in the child table */
+
+		clust_index = dict_table_get_first_index(table);
+
+		tmp_heap = mem_heap_create(256);
+		
+		ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec,
+								tmp_heap);
+		btr_pcur_open_with_no_init(clust_index, ref,
+			PAGE_CUR_LE, BTR_SEARCH_LEAF,
+			cascade->pcur, 0, mtr);
+
+		mem_heap_free(tmp_heap);
+
+		clust_rec = btr_pcur_get_rec(cascade->pcur);
+	}
+
+	if (!page_rec_is_user_rec(clust_rec)) {
+	  	fprintf(stderr,
+			"InnoDB: error in cascade of a foreign key op\n"
+		  	"InnoDB: index %s table %s\n", index->name,
+		  	index->table->name);
+
+	  	rec_sprintf(err_buf, 900, rec);
+	  	fprintf(stderr, "InnoDB: record %s\n", err_buf);
+
+	  	rec_sprintf(err_buf, 900, clust_rec);
+	  	fprintf(stderr, "InnoDB: clustered record %s\n", err_buf);
+
+	  	fprintf(stderr,
+			"InnoDB: Make a detailed bug report and send it\n");
+	  	fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
+
+		err = DB_SUCCESS;
+
+		goto nonstandard_exit_func;
+	}
+
+	/* Set an X-lock on the row to delete or update in the child table */
+
+	err = lock_table(0, table, LOCK_IX, thr);
+
+	if (err == DB_SUCCESS) {
+		err = lock_clust_rec_read_check_and_lock(0, clust_rec,
+						clust_index, LOCK_X, thr);
+	}
+	
+	if (err != DB_SUCCESS) {
+
+		goto nonstandard_exit_func;
+	}
+
+	if (rec_get_deleted_flag(clust_rec)) {
+		/* This can happen if there is a circular reference of
+		rows such that cascading delete comes to delete a row
+		already in the process of being delete marked */
+/*
+	  	fprintf(stderr,
+			"InnoDB: error 2 in cascade of a foreign key op\n"
+		  	"InnoDB: index %s table %s\n", index->name,
+		  	index->table->name);
+
+	  	rec_sprintf(err_buf, 900, rec);
+	  	fprintf(stderr, "InnoDB: record %s\n", err_buf);
+
+	  	rec_sprintf(err_buf, 900, clust_rec);
+	  	fprintf(stderr, "InnoDB: clustered record %s\n", err_buf);
+
+	  	fprintf(stderr,
+			"InnoDB: Make a detailed bug report and send it\n");
+	  	fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n");
+
+		ut_a(0);
+*/
+		err = DB_SUCCESS;		
+
+		goto nonstandard_exit_func;
+	}
+
+	if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
+		/* Build the appropriate update vector which sets
+		foreign->n_fields first fields in rec to SQL NULL */
+
+		update = cascade->update;
+
+		update->info_bits = 0;
+		update->n_fields = foreign->n_fields;
+		
+		for (i = 0; i < foreign->n_fields; i++) {
+			(update->fields + i)->field_no
+				= dict_table_get_nth_col_pos(table,
+					dict_index_get_nth_col_no(index, i));
+			(update->fields + i)->exp = NULL;
+			(update->fields + i)->new_val.len = UNIV_SQL_NULL;
+			(update->fields + i)->new_val.data = NULL;
+			(update->fields + i)->extern_storage = FALSE;
+		}
+	}
+
+	/* Store pcur position and initialize or store the cascade node
+	pcur stored position */
+	
+	btr_pcur_store_position(pcur, mtr);
+	
+	if (index == clust_index) {
+		btr_pcur_copy_stored_position(cascade->pcur, pcur);
+	} else {
+		btr_pcur_store_position(cascade->pcur, mtr);
+	}
+		
+	mtr_commit(mtr);
+
+	ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON);
+
+	cascade->state = UPD_NODE_UPDATE_CLUSTERED;
+	
+	err = row_update_cascade_for_mysql(thr, cascade,
+						foreign->foreign_table);
+	mtr_start(mtr);
+
+	/* Restore pcur position */
+	
+	btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
+
+	return(err);
+
+nonstandard_exit_func:
+
+	btr_pcur_store_position(pcur, mtr);
+
+	mtr_commit(mtr);
+	mtr_start(mtr);
+
+	btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
+
+	return(err);
+}
+
+/*************************************************************************
 Sets a shared lock on a record. Used in locking possible duplicate key
 records. */
 static
@@ -391,7 +612,7 @@ row_ins_check_foreign_constraint(
 				/* out: DB_SUCCESS, DB_LOCK_WAIT,
 				DB_NO_REFERENCED_ROW,
 				or DB_ROW_IS_REFERENCED */
-	ibool		check_ref,/* in: TRUE If we want to check that
+	ibool		check_ref,/* in: TRUE if we want to check that
 				the referenced table is ok, FALSE if we
 				want to to check the foreign key table */
 	dict_foreign_t*	foreign,/* in: foreign constraint; NOTE that the
@@ -411,10 +632,30 @@ row_ins_check_foreign_constraint(
 	ibool		moved;
 	int		cmp;
 	ulint		err;
+	ulint		i;
 	mtr_t		mtr;
 
 	ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED));
 
+	if (thr_get_trx(thr)->check_foreigns == FALSE) {
+		/* The user has suppressed foreign key checks currently for
+		this session */
+
+		return(DB_SUCCESS);
+	}
+
+	/* If any of the foreign key fields in entry is SQL NULL, we
+	suppress the foreign key check: this is compatible with Oracle,
+	for example */
+
+	for (i = 0; i < foreign->n_fields; i++) {
+		if (UNIV_SQL_NULL == dfield_get_len(
+                                         dtuple_get_nth_field(entry, i))) {
+
+			return(DB_SUCCESS);
+		}
+	}
+
 	if (check_ref) {
 		check_table = foreign->referenced_table;
 		check_index = foreign->referenced_index;
@@ -465,8 +706,8 @@ row_ins_check_foreign_constraint(
 
 			goto next_rec;
 		}
-				
-		/* Try to place a lock on the index record */	
+		
+		/* Try to place a lock on the index record */
 
 		err = row_ins_set_shared_rec_lock(rec, check_index, thr);
 
@@ -488,11 +729,21 @@ row_ins_check_foreign_constraint(
 
 				if (check_ref) {			
 					err = DB_SUCCESS;
+
+					break;
+				} else if (foreign->type != 0) {
+					err =
+					  row_ins_foreign_delete_or_set_null(
+						thr, foreign, &pcur, &mtr);
+
+					if (err != DB_SUCCESS) {
+
+						break;
+					}
 				} else {
 					err = DB_ROW_IS_REFERENCED;
+					break;
 				}
-			
-				break;
 			}
 		}
 
@@ -521,6 +772,8 @@ next_rec:
 		}
 	}
 
+	btr_pcur_close(&pcur);
+
 	mtr_commit(&mtr);
 
 	/* Restore old value */
@@ -548,6 +801,10 @@ row_ins_check_foreign_constraints(
 {
 	dict_foreign_t*	foreign;
 	ulint		err;
+	trx_t*		trx;
+	ibool		got_s_lock	= FALSE;
+
+	trx = thr_get_trx(thr);
 
 	foreign = UT_LIST_GET_FIRST(table->foreign_list);
 
@@ -556,16 +813,26 @@ row_ins_check_foreign_constraints(
 
 			if (foreign->referenced_table == NULL) {
 				dict_table_get(foreign->referenced_table_name,
-						thr_get_trx(thr));
+									trx);
 			}
 
-			rw_lock_s_lock(&dict_foreign_key_check_lock);	
+			if (!trx->has_dict_foreign_key_check_lock) {
+				got_s_lock = TRUE;
+
+				rw_lock_s_lock(&dict_foreign_key_check_lock);
+
+				trx->has_dict_foreign_key_check_lock = TRUE;
+			}
 
 			err = row_ins_check_foreign_constraint(TRUE, foreign,
 						table, index, entry, thr);
+			if (got_s_lock) {
 
-			rw_lock_s_unlock(&dict_foreign_key_check_lock);	
+				rw_lock_s_unlock(&dict_foreign_key_check_lock);	
 
+				trx->has_dict_foreign_key_check_lock = FALSE;
+			}
+				
 			if (err != DB_SUCCESS) {
 				return(err);
 			}
@@ -591,6 +858,8 @@ row_ins_scan_sec_index_for_duplicate(
 	dtuple_t*	entry,	/* in: index entry */
 	que_thr_t*	thr)	/* in: query thread */
 {
+	ulint		n_unique;
+	ulint		i;
 	int		cmp;
 	ulint		n_fields_cmp;
 	rec_t*		rec;
@@ -599,6 +868,20 @@ row_ins_scan_sec_index_for_duplicate(
 	ibool		moved;
 	mtr_t		mtr;
 
+	n_unique = dict_index_get_n_unique(index);
+
+	/* If the secondary index is unique, but one of the fields in the
+	n_unique first fields is NULL, a unique key violation cannot occur,
+	since we define NULL != NULL in this case */
+
+	for (i = 0; i < n_unique; i++) {
+		if (UNIV_SQL_NULL == dfield_get_len(
+                                         dtuple_get_nth_field(entry, i))) {
+
+			return(DB_SUCCESS);
+		}
+	}
+
 	mtr_start(&mtr);
 
 	/* Store old value on n_fields_cmp */
@@ -839,13 +1122,14 @@ row_ins_index_entry_low(
 	ulint		n_ext_vec,/* in: number of fields in ext_vec */
 	que_thr_t*	thr)	/* in: query thread */
 {
-	btr_cur_t	cursor;		
+	btr_cur_t	cursor;
+	ulint		ignore_sec_unique	= 0;
 	ulint		modify = 0; /* remove warning */
 	rec_t*		insert_rec;
 	rec_t*		rec;
 	ulint		err;
 	ulint		n_unique;
-	big_rec_t*	big_rec		= NULL;
+	big_rec_t*	big_rec			= NULL;
 	mtr_t		mtr;
 	
 	log_free_check();
@@ -858,8 +1142,13 @@ row_ins_index_entry_low(
 	the function will return in both low_match and up_match of the
 	cursor sensible values */
 	
+	if (!(thr_get_trx(thr)->check_unique_secondary)) {
+		ignore_sec_unique = BTR_IGNORE_SEC_UNIQUE;
+	}
+
 	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
-					mode | BTR_INSERT, &cursor, 0, &mtr);
+				mode | BTR_INSERT | ignore_sec_unique,
+				&cursor, 0, &mtr);
 
 	if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
 		/* The insertion was made to the insert buffer already during
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index f5ed0ef65af..d6c8d7ab412 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -28,6 +28,9 @@ Created 9/17/2000 Heikki Tuuri
 #include "rem0cmp.h"
 #include "log0log.h"
 
+/* A dummy variable used to fool the compiler */
+ibool	row_mysql_identically_false	= FALSE;
+
 /* List of tables we should drop in background. ALTER TABLE in MySQL requires
 that the table handler can drop the table in background when there are no
 queries to it any more. Protected by the kernel mutex. */
@@ -63,15 +66,48 @@ row_mysql_store_blob_ref(
 	byte*	dest,		/* in: where to store */
 	ulint	col_len,	/* in: dest buffer size: determines into
 				how many bytes the BLOB length is stored,
-				this may vary from 1 to 4 bytes */
-	byte*	data,		/* in: BLOB data */
-	ulint	len)		/* in: BLOB length */
+				the space for the length may vary from 1
+				to 4 bytes */
+	byte*	data,		/* in: BLOB data; if the value to store
+				is SQL NULL this should be NULL pointer */
+	ulint	len)		/* in: BLOB length; if the value to store
+				is SQL NULL this should be 0; remember
+				also to set the NULL bit in the MySQL record
+				header! */
 {
+	ulint	sum	= 0;
+	ulint	i;
+
+	/* MySQL might assume the field is set to zero except the length and
+	the pointer fields */
+
+	memset(dest, '\0', col_len);
+
 	/* In dest there are 1 - 4 bytes reserved for the BLOB length,
 	and after that 8 bytes reserved for the pointer to the data.
 	In 32-bit architectures we only use the first 4 bytes of the pointer
 	slot. */
 
+	ut_a(col_len - 8 > 1 || len < 256);
+	ut_a(col_len - 8 > 2 || len < 256 * 256);
+	ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
+
+	/* We try to track an elusive bug which probably was fixed
+	May 9, 2002, but better be sure: we probe the data buffer
+	to make sure it is in valid allocated memory */
+
+	for (i = 0; i < len; i++) {
+
+		sum += (ulint)(data + i);
+	}
+
+	/* The variable below is identically false, we just fool the
+	compiler to not optimize away our loop */
+	if (row_mysql_identically_false) {
+
+		printf("Sum %lu\n", sum);
+	}
+
 	mach_write_to_n_little_endian(dest, col_len - 8, len);
 
 	ut_memcpy(dest + col_len - 8, (byte*)&data, sizeof(byte*));	
@@ -499,29 +535,24 @@ UNIV_INLINE
 void
 row_update_statistics_if_needed(
 /*============================*/
-	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct */
+	dict_table_t*	table)	/* in: table */
 {
 	ulint	counter;
 	
-	counter = prebuilt->table->stat_modified_counter;
-
-	/* Since the physical size of an InnoDB row is bigger than the
-	MySQL row len, we put a safety factor 2 below */
+	counter = table->stat_modified_counter;
 
-	counter += 2 * prebuilt->mysql_row_len;
-
-	prebuilt->table->stat_modified_counter = counter;
+	table->stat_modified_counter = counter + 1;
 
 	/* Calculate new statistics if 1 / 16 of table has been modified
 	since the last time a statistics batch was run, or if
-	stat_modified_counter > 2 000 000 000 (to avoid wrap-around) */
+	stat_modified_counter > 2 000 000 000 (to avoid wrap-around).
+	We calculate statistics at most every 16th round, since we may have
+	a counter table which is very small and updated very often. */
 
 	if (counter > 2000000000
-	    || ((ib_longlong)counter >
-		(UNIV_PAGE_SIZE * prebuilt->table->stat_clustered_index_size)
-		/ 16)) {
+	    || ((ib_longlong)counter > 16 + table->stat_n_rows / 16)) {
 
-		dict_update_statistics(prebuilt->table);
+		dict_update_statistics(table);
 	}	
 }
 		  	
@@ -712,7 +743,7 @@ run_again:
 		prebuilt->table->stat_n_rows--;
 	}	
 
-	row_update_statistics_if_needed(prebuilt);
+	row_update_statistics_if_needed(prebuilt->table);
 	trx->op_info = (char *) "";
 
 	return((int) err);
@@ -746,6 +777,43 @@ row_prebuild_sel_graph(
 }
 
 /*************************************************************************
+Creates an query graph node of 'update' type to be used in the MySQL
+interface. */
+
+upd_node_t*
+row_create_update_node_for_mysql(
+/*=============================*/
+				/* out, own: update node */
+	dict_table_t*	table,	/* in: table to update */
+	mem_heap_t*	heap)	/* in: mem heap from which allocated */
+{
+	upd_node_t*	node;
+
+	node = upd_node_create(heap);
+		
+	node->in_mysql_interface = TRUE;
+	node->is_delete = FALSE;
+	node->searched_update = FALSE;
+	node->select_will_do_update = FALSE;
+	node->select = NULL;
+	node->pcur = btr_pcur_create_for_mysql();
+	node->table = table;
+
+	node->update = upd_create(dict_table_get_n_cols(table), heap);
+
+	node->update_n_fields = dict_table_get_n_cols(table);
+	
+	UT_LIST_INIT(node->columns);
+	node->has_clust_rec_x_lock = TRUE;
+	node->cmpl_info = 0;
+
+	node->table_sym = NULL;
+	node->col_assign_list = NULL;
+
+	return(node);
+}
+
+/*************************************************************************
 Gets pointer to a prebuilt update vector used in updates. If the update
 graph has not yet been built in the prebuilt struct, then this function
 first builds it. */
@@ -767,26 +835,9 @@ row_get_prebuilt_update_vector(
 		/* Not called before for this handle: create an update node
 		and query graph to the prebuilt struct */
 
-		node = upd_node_create(prebuilt->heap);
-		
-		prebuilt->upd_node = node;
+		node = row_create_update_node_for_mysql(table, prebuilt->heap);
 
-		node->in_mysql_interface = TRUE;
-		node->is_delete = FALSE;
-		node->searched_update = FALSE;
-		node->select_will_do_update = FALSE;
-		node->select = NULL;
-		node->pcur = btr_pcur_create_for_mysql();
-		node->table = table;
-
-		node->update = upd_create(dict_table_get_n_cols(table),
-							prebuilt->heap);
-		UT_LIST_INIT(node->columns);
-		node->has_clust_rec_x_lock = TRUE;
-		node->cmpl_info = 0;
-
-		node->table_sym = NULL;
-		node->col_assign_list = NULL;
+		prebuilt->upd_node = node;
 		
 		prebuilt->upd_graph =
 			que_node_get_parent(
@@ -914,7 +965,7 @@ run_again:
 
 	que_thr_stop_for_mysql_no_error(thr, trx);
 
-	if (prebuilt->upd_node->is_delete) {
+	if (node->is_delete) {
 		if (prebuilt->table->stat_n_rows > 0) {
 			prebuilt->table->stat_n_rows--;
 		}
@@ -924,13 +975,66 @@ run_again:
 		srv_n_rows_updated++;
 	}
 
-	row_update_statistics_if_needed(prebuilt);
+	row_update_statistics_if_needed(prebuilt->table);
 
 	trx->op_info = (char *) "";
 
 	return((int) err);
 }
 
+/**************************************************************************
+Does a cascaded delete or set null in a foreign key operation. */
+
+ulint
+row_update_cascade_for_mysql(
+/*=========================*/
+				/* out: error code or DB_SUCCESS */
+	que_thr_t*	thr,	/* in: query thread */
+	upd_node_t*	node,	/* in: update node used in the cascade
+				or set null operation */
+	dict_table_t*	table)	/* in: table where we do the operation */
+{
+	ulint		err;
+	trx_t*		trx;
+
+	trx = thr_get_trx(thr);
+
+run_again:
+	thr->run_node = node;
+	thr->prev_node = node;
+
+	row_upd_step(thr);
+
+	err = trx->error_state;
+
+	if (err == DB_LOCK_WAIT) {
+		que_thr_stop_for_mysql(thr);
+	
+		row_mysql_handle_errors(&err, trx, thr, NULL);
+
+		goto run_again;
+	}
+
+	if (err != DB_SUCCESS) {
+
+		return(err);
+	}
+
+	if (node->is_delete) {
+		if (table->stat_n_rows > 0) {
+			table->stat_n_rows--;
+		}
+
+		srv_n_rows_deleted++;
+	} else {
+		srv_n_rows_updated++;
+	}
+
+	row_update_statistics_if_needed(table);
+
+	return(err);
+}
+
 /*************************************************************************
 Checks if a table is such that we automatically created a clustered
 index on it (on row id). */
@@ -1171,6 +1275,7 @@ row_create_table_for_mysql(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
+	rw_lock_x_lock(&(dict_foreign_key_check_lock));
 	mutex_enter(&(dict_sys->mutex));
 
 	heap = mem_heap_create(512);
@@ -1223,6 +1328,8 @@ row_create_table_for_mysql(
 	}
 
 	mutex_exit(&(dict_sys->mutex));
+	rw_lock_x_unlock(&(dict_foreign_key_check_lock));
+
 	que_graph_free((que_t*) que_node_get_parent(thr));
 
 	trx->op_info = (char *) "";
@@ -1270,6 +1377,7 @@ row_create_index_for_mysql(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
+	rw_lock_x_lock(&(dict_foreign_key_check_lock));
 	mutex_enter(&(dict_sys->mutex));
 
 	heap = mem_heap_create(512);
@@ -1300,6 +1408,7 @@ row_create_index_for_mysql(
 	}
 
 	mutex_exit(&(dict_sys->mutex));
+	rw_lock_x_unlock(&(dict_foreign_key_check_lock));
 
 	que_graph_free((que_t*) que_node_get_parent(thr));
 	
@@ -1355,6 +1464,7 @@ row_table_add_foreign_constraints(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
+	rw_lock_x_lock(&(dict_foreign_key_check_lock));
 	mutex_enter(&(dict_sys->mutex));
 
 	trx->dict_operation = TRUE;
@@ -1379,6 +1489,7 @@ row_table_add_foreign_constraints(
 	}
 
 	mutex_exit(&(dict_sys->mutex));
+	rw_lock_x_unlock(&(dict_foreign_key_check_lock));
 
 	return((int) err);
 }
@@ -1473,7 +1584,8 @@ loop:
 	        goto already_dropped;
 	}
 
-	if (table->n_mysql_handles_opened > 0) {
+	if (table->n_mysql_handles_opened > 0
+				|| table->n_foreign_key_checks_running > 0) {
 
 		return(n_tables + n_tables_dropped);
 	}
@@ -1722,6 +1834,9 @@ row_drop_table_for_mysql(
 	no deadlocks can occur then in these operations */
 
 	if (!has_dict_mutex) {
+		/* Prevent foreign key checks while we are dropping the table */
+		rw_lock_x_lock(&(dict_foreign_key_check_lock));
+
 		mutex_enter(&(dict_sys->mutex));
 	}
 
@@ -1734,9 +1849,6 @@ row_drop_table_for_mysql(
 
 	graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
 
-	/* Prevent foreign key checks while we are dropping the table */
-	rw_lock_x_lock(&(dict_foreign_key_check_lock));
-
 	/* Prevent purge from running while we are dropping the table */
 	rw_lock_s_lock(&(purge_sys->purge_is_running));
 
@@ -1771,6 +1883,22 @@ row_drop_table_for_mysql(
 		goto funct_exit;
 	}
 
+	if (table->n_foreign_key_checks_running > 0) {
+		
+	        ut_print_timestamp(stderr);
+	        fprintf(stderr,
+		  "  InnoDB: You are trying to drop table %s\n"
+		  "InnoDB: though there are foreign key check running on it.\n"
+		  "InnoDB: Adding the table to the background drop queue.\n",
+		  table->name);
+
+		row_add_table_to_background_drop_list(table);
+
+		err = DB_SUCCESS;
+
+		goto funct_exit;
+	}
+	
 	/* Remove any locks there are on the table or its records */
 	
 	lock_reset_all_on_table(table);
@@ -1798,10 +1926,9 @@ row_drop_table_for_mysql(
 funct_exit:	
 	rw_lock_s_unlock(&(purge_sys->purge_is_running));
 
-	rw_lock_x_unlock(&(dict_foreign_key_check_lock));
-
 	if (!has_dict_mutex) {
 		mutex_exit(&(dict_sys->mutex));
+		rw_lock_x_unlock(&(dict_foreign_key_check_lock));
 	}
 
 	que_graph_free(graph);
@@ -1837,6 +1964,7 @@ row_drop_database_for_mysql(
 	
 	trx_start_if_not_started(trx);
 loop:
+	rw_lock_x_lock(&(dict_foreign_key_check_lock));
 	mutex_enter(&(dict_sys->mutex));
 
 	while ((table_name = dict_get_first_table_name_in_db(name))) {
@@ -1878,6 +2006,7 @@ loop:
 	}
 
 	mutex_exit(&(dict_sys->mutex));
+	rw_lock_x_unlock(&(dict_foreign_key_check_lock));
 	
 	trx_commit_for_mysql(trx);
 
@@ -1887,6 +2016,28 @@ loop:
 }
 
 /*************************************************************************
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL. */
+static
+ibool
+row_is_mysql_tmp_table_name(
+/*========================*/
+			/* out: TRUE if temporary table */
+	char*	name)	/* in: table name in the form 'database/tablename' */
+{
+	ulint	i;
+
+	for (i = 0; i <= ut_strlen(name) - 5; i++) {
+		if (ut_memcmp(name + i, "/#sql", 5) == 0) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
 Renames a table for MySQL. */
 
 int
@@ -1949,16 +2100,27 @@ row_rename_table_for_mysql(
 	str2 = (char *) 
 	"';\nold_table_name := '";
 
-	str3 = (char *) 
-	"';\n"
-	"UPDATE SYS_TABLES SET NAME = new_table_name\n"
-	"WHERE NAME = old_table_name;\n"
-	"UPDATE SYS_FOREIGN SET FOR_NAME = new_table_name\n"
-	"WHERE FOR_NAME = old_table_name;\n"
-	"UPDATE SYS_FOREIGN SET REF_NAME = new_table_name\n"
-	"WHERE REF_NAME = old_table_name;\n"
-	"COMMIT WORK;\n"
-	"END;\n";
+	if (row_is_mysql_tmp_table_name(new_name)) {
+
+		/* We want to preserve the original foreign key
+		constraint definitions despite the name change */
+
+		str3 = (char*)
+		"';\n"
+		"UPDATE SYS_TABLES SET NAME = new_table_name\n"
+		"WHERE NAME = old_table_name;\n"
+		"END;\n";
+	} else {
+		str3 = (char*)
+		"';\n"
+		"UPDATE SYS_TABLES SET NAME = new_table_name\n"
+		"WHERE NAME = old_table_name;\n"
+		"UPDATE SYS_FOREIGN SET FOR_NAME = new_table_name\n"
+		"WHERE FOR_NAME = old_table_name;\n"
+		"UPDATE SYS_FOREIGN SET REF_NAME = new_table_name\n"
+		"WHERE REF_NAME = old_table_name;\n"
+		"END;\n";
+	}
 
 	len = ut_strlen(str1);
 
@@ -1981,6 +2143,7 @@ row_rename_table_for_mysql(
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
+	rw_lock_x_lock(&(dict_foreign_key_check_lock));
 	mutex_enter(&(dict_sys->mutex));
 
 	table = dict_table_get_low(old_name);
@@ -2033,10 +2196,36 @@ row_rename_table_for_mysql(
 		trx_general_rollback_for_mysql(trx, FALSE, NULL);
 		trx->error_state = DB_SUCCESS;
 	} else {
-		ut_a(dict_table_rename_in_cache(table, new_name));
+		ut_a(dict_table_rename_in_cache(table, new_name,
+				!row_is_mysql_tmp_table_name(new_name)));
+
+		if (row_is_mysql_tmp_table_name(old_name)) {
+
+			err = dict_load_foreigns(new_name);
+
+			if (err != DB_SUCCESS) {
+
+	    			ut_print_timestamp(stderr);
+
+				fprintf(stderr,
+     "  InnoDB: Error: in ALTER TABLE table %s\n"
+     "InnoDB: has or is referenced in foreign key constraints\n"
+     "InnoDB: which are not compatible with the new table definition.\n",
+     new_name);
+     
+				ut_a(dict_table_rename_in_cache(table,
+							old_name, FALSE));
+						
+				trx->error_state = DB_SUCCESS;
+				trx_general_rollback_for_mysql(trx, FALSE,
+									NULL);
+				trx->error_state = DB_SUCCESS;
+			}
+		}
 	}
 funct_exit:	
 	mutex_exit(&(dict_sys->mutex));
+	rw_lock_x_unlock(&(dict_foreign_key_check_lock));
 
 	que_graph_free(graph);
 	
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
index abae7f373bf..f3dced15fdf 100644
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -2071,13 +2071,11 @@ row_sel_store_mysql_rec(
 		data = rec_get_nth_field(rec, templ->rec_field_no, &len);
 
 		if (rec_get_nth_field_extern_bit(rec, templ->rec_field_no)) {
+
 			/* Copy an externally stored field to the temporary
 			heap */
 
-			if (prebuilt->trx->has_search_latch) {
-				rw_lock_s_unlock(&btr_search_latch);
-				prebuilt->trx->has_search_latch = FALSE;
-			}
+			ut_a(!prebuilt->trx->has_search_latch);
 
 			extern_field_heap = mem_heap_create(UNIV_PAGE_SIZE);
 
@@ -2091,6 +2089,8 @@ row_sel_store_mysql_rec(
 		if (len != UNIV_SQL_NULL) {
 			if (templ->type == DATA_BLOB) {
 
+				ut_a(prebuilt->templ_contains_blob);
+				
 				/* Copy the BLOB data to the BLOB
 				heap of prebuilt */
 
@@ -2116,8 +2116,28 @@ row_sel_store_mysql_rec(
 				extern_field_heap = NULL;
  			}
 		} else {
-			mysql_rec[templ->mysql_null_byte_offset] |=
+			/* MySQL sometimes seems to copy the 'data'
+			pointed to by a BLOB field even if the field
+			has been marked to contain the SQL NULL value.
+			This caused seg faults reported by two users.
+			Set the BLOB length to 0 and the data pointer
+			to NULL to avoid a seg fault. */
+
+			if (templ->type == DATA_BLOB) {
+				row_sel_field_store_in_mysql_format(
+				mysql_rec + templ->mysql_col_offset,
+				templ->mysql_col_len, NULL,
+				0, templ->type, templ->is_unsigned);
+			}
+
+			if (!templ->mysql_null_bit_mask) {
+				fprintf(stderr,
+"InnoDB: Error: trying to return an SQL NULL field in a non-null\n"
+"innoDB: column! Table name %s\n", prebuilt->table->name);
+			} else {
+				mysql_rec[templ->mysql_null_byte_offset] |=
 					(byte) (templ->mysql_null_bit_mask);
+			}
 		}
 	} 
 }
@@ -2234,7 +2254,7 @@ row_sel_get_clust_rec_for_mysql(
 		(or old_vers) is not rec; in that case we must ignore
 		such row because in our snapshot rec would not have existed.
 		Remember that from rec we cannot see directly which transaction
-		id corrsponds to it: we have to go to the clustered index
+		id corresponds to it: we have to go to the clustered index
 		record. A query where we want to fetch all rows where
 		the secondary index value is in some interval would return
 		a wrong result if we would not drop rows which we come to
@@ -2245,6 +2265,12 @@ row_sel_get_clust_rec_for_mysql(
 		    && !row_sel_sec_rec_is_for_clust_rec(rec, sec_index,
 						clust_rec, clust_index)) {
 			clust_rec = NULL;
+		} else {
+#ifdef UNIV_SEARCH_DEBUG
+			ut_a(clust_rec == NULL ||
+			    row_sel_sec_rec_is_for_clust_rec(rec, sec_index,
+						clust_rec, clust_index));
+#endif		
 		}
 	}
 
@@ -2357,6 +2383,7 @@ row_sel_push_cache_row_for_mysql(
 	ulint	i;
 
 	ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
+	ut_a(!prebuilt->templ_contains_blob);
 
 	if (prebuilt->fetch_cache[0] == NULL) {
 		/* Allocate memory for the fetch cache */
@@ -2397,10 +2424,16 @@ row_sel_try_search_shortcut_for_mysql(
 	rec_t*		rec;
 	
 	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(!prebuilt->templ_contains_blob);
 
 	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
 					BTR_SEARCH_LEAF, pcur,
-					RW_S_LATCH, mtr);
+#ifndef UNIV_SEARCH_DEBUG
+					RW_S_LATCH,
+#else
+					0,
+#endif
+					mtr);
 	rec = btr_pcur_get_rec(pcur);
 	
 	if (!page_rec_is_user_rec(rec)) {
@@ -2574,8 +2607,16 @@ row_search_for_mysql(
 
 	mtr_start(&mtr);
 
-	if (match_mode == ROW_SEL_EXACT && index->type & DICT_UNIQUE
+	/* Since we must release the search system latch when we retrieve an
+	externally stored field, we cannot use the adaptive hash index in a
+	search in the case the row may be long and there may be externally
+	stored fields */
+	
+	if (match_mode == ROW_SEL_EXACT
+		&& index->type & DICT_UNIQUE
 		&& index->type & DICT_CLUSTERED
+		&& !prebuilt->templ_contains_blob
+		&& (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)
 		&& dtuple_get_n_fields(search_tuple)
 				== dict_index_get_n_unique(index)) {
 
@@ -2624,15 +2665,18 @@ row_search_for_mysql(
 				
 				goto no_shortcut;
 			}
-			
+#ifndef UNIV_SEARCH_DEBUG			
 			if (!trx->has_search_latch) {
 				rw_lock_s_lock(&btr_search_latch);
 				trx->has_search_latch = TRUE;
 			}
-
+#endif
 			shortcut = row_sel_try_search_shortcut_for_mysql(&rec,
 							       prebuilt, &mtr);
 			if (shortcut == SEL_FOUND) {
+#ifdef UNIV_SEARCH_DEBUG
+				ut_a(0 == cmp_dtuple_rec(search_tuple, rec));
+#endif 
 				row_sel_store_mysql_rec(buf, prebuilt, rec);
 	
  				mtr_commit(&mtr);
@@ -2794,7 +2838,9 @@ rec_loop:
 		/* The record matches enough */
 
 		ut_ad(mode == PAGE_CUR_GE);
-	
+#ifdef UNIV_SEARCH_DEBUG
+		ut_a(0 == cmp_dtuple_rec(search_tuple, rec));
+#endif	
 	} else if (match_mode == ROW_SEL_EXACT) {
 		/* Test if the index record matches completely to search_tuple
 		in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
@@ -2923,15 +2969,18 @@ rec_loop:
 	/* We found a qualifying row */
 	
 	if (prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD
-			&& !prebuilt->templ_contains_blob
 			&& prebuilt->select_lock_type == LOCK_NONE
+			&& !prebuilt->templ_contains_blob
 			&& !prebuilt->clust_index_was_generated
 	                && prebuilt->template_type
 	                                 != ROW_MYSQL_DUMMY_TEMPLATE) {
 
 		/* Inside an update, for example, we do not cache rows,
 		since we may use the cursor position to do the actual
-		update, that is why we require ...lock_type == LOCK_NONE */
+		update, that is why we require ...lock_type == LOCK_NONE.
+		Since we keep space in prebuilt only for the BLOBs of
+		a single row, we cannot cache rows in the case there
+		are BLOBs in the fields to be fetched. */
 
 		row_sel_push_cache_row_for_mysql(prebuilt, rec);
 
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
index 31d58dd04a2..1d5319a182b 100644
--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@@ -73,8 +73,7 @@ steps of query graph execution. */
 
 /*************************************************************************
 Checks if index currently is mentioned as a referenced index in a foreign
-key constraint. This function also loads into the dictionary cache the
-possible referencing table. */
+key constraint. */
 static
 ibool
 row_upd_index_is_referenced(
@@ -85,44 +84,28 @@ row_upd_index_is_referenced(
 				the referencing table has been dropped when
 				we leave this function: this function is only
 				for heuristic use! */
-	dict_index_t*	index)	/* in: index */
+	dict_index_t*	index,	/* in: index */
+	trx_t*		trx)	/* in: transaction */
 {
-	dict_table_t*	table	= index->table;
+	dict_table_t*	table		= index->table;
 	dict_foreign_t*	foreign;
-	ulint		phase	= 1;
 
-try_again:	
 	if (!UT_LIST_GET_FIRST(table->referenced_list)) {
 
 		return(FALSE);
 	}
 
-	if (phase == 2) {
-		mutex_enter(&(dict_sys->mutex));
+	if (!trx->has_dict_foreign_key_check_lock) {
+		rw_lock_s_lock(&dict_foreign_key_check_lock);
 	}
 
-	rw_lock_s_lock(&dict_foreign_key_check_lock);
-
 	foreign = UT_LIST_GET_FIRST(table->referenced_list);
 
 	while (foreign) {
 		if (foreign->referenced_index == index) {
-			if (foreign->foreign_table == NULL) {
-				if (phase == 2) {
-					dict_table_get_low(foreign->
-							foreign_table_name);
-				} else {
-					phase = 2;
-					rw_lock_s_unlock(
-						&dict_foreign_key_check_lock);
-					goto try_again;
-				}
-			}
-
-			rw_lock_s_unlock(&dict_foreign_key_check_lock);
 
-			if (phase == 2) {
-				mutex_exit(&(dict_sys->mutex));
+			if (!trx->has_dict_foreign_key_check_lock) {
+				rw_lock_s_unlock(&dict_foreign_key_check_lock);
 			}
 
 			return(TRUE);
@@ -131,10 +114,8 @@ try_again:
 		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
 	}
 	
-	rw_lock_s_unlock(&dict_foreign_key_check_lock);
-
-	if (phase == 2) {
-		mutex_exit(&(dict_sys->mutex));
+	if (!trx->has_dict_foreign_key_check_lock) {
+		rw_lock_s_unlock(&dict_foreign_key_check_lock);
 	}
 
 	return(FALSE);
@@ -142,7 +123,7 @@ try_again:
 
 /*************************************************************************
 Checks if possible foreign key constraints hold after a delete of the record
-under pcur. NOTE that this function will temporarily commit mtr and lose
+under pcur. NOTE that this function will temporarily commit mtr and lose the
 pcur position! */
 static
 ulint
@@ -160,8 +141,17 @@ row_upd_check_references_constraints(
 	dict_foreign_t*	foreign;
 	mem_heap_t*	heap;
 	dtuple_t*	entry;
+	trx_t*		trx;
 	rec_t*		rec;
 	ulint		err;
+	ibool		got_s_lock	= FALSE;
+
+	if (UT_LIST_GET_FIRST(table->referenced_list) == NULL) {
+
+		return(DB_SUCCESS);
+	}
+
+	trx = thr_get_trx(thr);
 
 	rec = btr_pcur_get_rec(pcur);
 
@@ -173,17 +163,61 @@ row_upd_check_references_constraints(
 
 	mtr_start(mtr);	
 	
-	rw_lock_s_lock(&dict_foreign_key_check_lock);	
+	if (!trx->has_dict_foreign_key_check_lock) {
+		got_s_lock = TRUE;
 
+		rw_lock_s_lock(&dict_foreign_key_check_lock);
+
+		trx->has_dict_foreign_key_check_lock = TRUE;
+	}
+		
 	foreign = UT_LIST_GET_FIRST(table->referenced_list);
 
 	while (foreign) {
 		if (foreign->referenced_index == index) {
+			if (foreign->foreign_table == NULL) {
+				dict_table_get(foreign->foreign_table_name,
+									trx);
+			}
 
+			if (foreign->foreign_table) {
+				mutex_enter(&(dict_sys->mutex));
+
+				(foreign->foreign_table
+				->n_foreign_key_checks_running)++;
+
+				mutex_exit(&(dict_sys->mutex));
+			}
+
+			/* NOTE that if the thread ends up waiting for a lock
+			we will release dict_foreign_key_check_lock
+			temporarily! But the counter on the table
+			protects 'foreign' from being dropped while the check
+			is running. */
+			
 			err = row_ins_check_foreign_constraint(FALSE, foreign,
 						table, index, entry, thr);
+
+			if (foreign->foreign_table) {
+				mutex_enter(&(dict_sys->mutex));
+
+				ut_a(foreign->foreign_table
+				->n_foreign_key_checks_running > 0);
+
+				(foreign->foreign_table
+				->n_foreign_key_checks_running)--;
+
+				mutex_exit(&(dict_sys->mutex));
+			}
+
 			if (err != DB_SUCCESS) {
-				rw_lock_s_unlock(&dict_foreign_key_check_lock);	
+				if (got_s_lock) {
+					rw_lock_s_unlock(
+						&dict_foreign_key_check_lock);	
+					trx->has_dict_foreign_key_check_lock
+								= FALSE;
+				}
+
 				mem_heap_free(heap);
 
 				return(err);
@@ -193,7 +227,11 @@ row_upd_check_references_constraints(
 		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
 	}
 
-	rw_lock_s_unlock(&dict_foreign_key_check_lock);	
+	if (got_s_lock) {
+		rw_lock_s_unlock(&dict_foreign_key_check_lock);	
+		trx->has_dict_foreign_key_check_lock = FALSE;
+	}
+
 	mem_heap_free(heap);
 	
 	return(DB_SUCCESS);
@@ -222,6 +260,9 @@ upd_node_create(
 	node->index = NULL;
 	node->update = NULL;
 	
+	node->cascade_heap = NULL;
+	node->cascade_node = NULL;
+	
 	node->select = NULL;
 	
 	node->heap = mem_heap_create(128);
@@ -1027,7 +1068,7 @@ row_upd_sec_index_entry(
 	
 	index = node->index;
 	
-	check_ref = row_upd_index_is_referenced(index);
+	check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr));
 
 	heap = mem_heap_create(1024);
 
@@ -1391,7 +1432,7 @@ row_upd_clust_step(
 	
 	index = dict_table_get_first_index(node->table);
 
-	check_ref = row_upd_index_is_referenced(index);
+	check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr));
 
 	pcur = node->pcur;
 
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index c3d3ed53058..f366ce0d160 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -69,13 +69,19 @@ char*	srv_main_thread_op_info = (char *) "";
 names, where the file name itself may also contain a path */
 
 char*	srv_data_home 	= NULL;
-char*	srv_logs_home 	= NULL;
 char*	srv_arch_dir 	= NULL;
 
 ulint	srv_n_data_files = 0;
 char**	srv_data_file_names = NULL;
 ulint*	srv_data_file_sizes = NULL;	/* size in database pages */ 
 
+ibool	srv_auto_extend_last_data_file	= FALSE; /* if TRUE, then we
+						 auto-extend the last data
+						 file */
+ulint	srv_last_file_size_max	= 0;		 /* if != 0, this tells
+						 the max size auto-extending
+						 may increase the last data
+						 file size */
 ulint*  srv_data_file_is_raw_partition = NULL;
 
 /* If the following is TRUE we do not allow inserts etc. This protects
@@ -1605,7 +1611,7 @@ srv_read_initfile(
 
 /*************************************************************************
 Initializes the server. */
-static
+
 void
 srv_init(void)
 /*==========*/
@@ -1673,7 +1679,7 @@ srv_init(void)
 /*************************************************************************
 Initializes the synchronization primitives, memory system, and the thread
 local storage. */
-static
+
 void
 srv_general_init(void)
 /*==================*/
@@ -1695,6 +1701,7 @@ srv_conc_enter_innodb(
 	trx_t*	trx)	/* in: transaction object associated with the
 			thread */
 {
+	ibool			has_slept	= FALSE;
 	srv_conc_slot_t*	slot;
 	ulint			i;
 
@@ -1712,7 +1719,7 @@ srv_conc_enter_innodb(
 
 		return;
 	}
-
+retry:
 	os_fast_mutex_lock(&srv_conc_mutex);
 
 	if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
@@ -1725,7 +1732,23 @@ srv_conc_enter_innodb(
 
 		return;
 	}
+
+	/* If the transaction is not holding resources, let it sleep
+	for 100 milliseconds, and try again then */
 	
+	if (!has_slept && !trx->has_search_latch
+	    && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
+
+	    	has_slept = TRUE; /* We let is sleep only once to avoid
+	    			  starvation */
+
+	    	os_fast_mutex_unlock(&srv_conc_mutex);
+
+	    	os_thread_sleep(100000);
+
+		goto retry;
+	}	    	
+
 	/* Too many threads inside: put the current thread to a queue */
 
 	for (i = 0; i < OS_THREAD_MAX_N; i++) {
@@ -1917,6 +1940,9 @@ srv_normalize_init_values(void)
 					* ((1024 * 1024) / UNIV_PAGE_SIZE);
 	}		
 
+	srv_last_file_size_max = srv_last_file_size_max
+					* ((1024 * 1024) / UNIV_PAGE_SIZE);
+		
 	srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
 
 	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
@@ -2000,15 +2026,18 @@ srv_suspend_mysql_thread(
 /*=====================*/
 				/* out: TRUE if the lock wait timeout was
 				exceeded */
-	que_thr_t*	thr)	/* in: query thread associated with
-				the MySQL OS thread */
+	que_thr_t*	thr)	/* in: query thread associated with the MySQL
+				OS thread */
 {
 	srv_slot_t*	slot;
 	os_event_t	event;
 	double		wait_time;
-
+	trx_t*		trx;
+	
 	ut_ad(!mutex_own(&kernel_mutex));
 
+	trx = thr_get_trx(thr);
+	
 	os_event_set(srv_lock_timeout_thread_event);
 
 	mutex_enter(&kernel_mutex);
@@ -2044,10 +2073,21 @@ srv_suspend_mysql_thread(
 	
 	srv_conc_force_exit_innodb(thr_get_trx(thr));
 
+	/* Release possible foreign key check latch */
+	if (trx->has_dict_foreign_key_check_lock) {
+
+		rw_lock_s_unlock(&dict_foreign_key_check_lock);
+	}
+
 	/* Wait for the release */
 	
 	os_event_wait(event);
 
+	if (trx->has_dict_foreign_key_check_lock) {
+
+		rw_lock_s_lock(&dict_foreign_key_check_lock);
+	}
+
 	/* Return back inside InnoDB */
 	
 	srv_conc_force_enter_innodb(thr_get_trx(thr));
diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
index bbb48331811..fa4f4bc9419 100644
--- a/innobase/srv/srv0start.c
+++ b/innobase/srv/srv0start.c
@@ -84,6 +84,308 @@ we may get an assertion failure in os0file.c */
 
 #define SRV_LOG_SPACE_FIRST_ID		1000000000
 
+/*************************************************************************
+Reads the data files and their sizes from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_data_file_paths_and_sizes(
+/*================================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: the data file path string */
+	char***	data_file_names,	/* out, own: array of data file
+					names */
+	ulint**	data_file_sizes,	/* out, own: array of data file sizes
+					in megabytes */
+	ulint**	data_file_is_raw_partition,/* out, own: array of flags
+					showing which data files are raw
+					partitions */
+	ulint*	n_data_files,		/* out: number of data files */
+	ibool*	is_auto_extending,	/* out: TRUE if the last data file is
+					auto-extending */
+	ulint*	max_auto_extend_size)	/* out: max auto extend size for the
+					last file if specified, 0 if not */
+{
+	char*	input_str;
+	char*	endp;
+	char*	path;
+	ulint	size;
+	ulint	i	= 0;
+
+	*is_auto_extending = FALSE;
+	*max_auto_extend_size = 0;
+
+	input_str = str;
+	
+	/* First calculate the number of data files and check syntax:
+	path:size[M | G];path:size[M | G]... . Note that a Windows path may
+	contain a drive name and a ':'. */
+
+	while (*str != '\0') {
+		path = str;
+
+		while ((*str != ':' && *str != '\0')
+		       || (*str == ':'
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
+			str++;
+		}
+
+		if (*str == '\0') {
+			return(FALSE);
+		}
+
+		str++;
+
+		size = strtoul(str, &endp, 10);
+
+		str = endp;
+
+		if (*str != 'M' && *str != 'G') {
+			size = size / (1024 * 1024);
+		} else if (*str == 'G') {
+		        size = size * 1024;
+			str++;
+		} else {
+		        str++;
+		}
+
+	        if (strlen(str) >= ut_strlen(":autoextend")
+	            && 0 == ut_memcmp(str, ":autoextend",
+						ut_strlen(":autoextend"))) {
+
+			str += ut_strlen(":autoextend");
+
+	        	if (strlen(str) >= ut_strlen(":max:")
+	            		&& 0 == ut_memcmp(str, ":max:",
+						ut_strlen(":max:"))) {
+
+				str += ut_strlen(":max:");
+
+				size = strtoul(str, &endp, 10);
+
+				str = endp;
+
+				if (*str != 'M' && *str != 'G') {
+					size = size / (1024 * 1024);
+				} else if (*str == 'G') {
+		        		size = size * 1024;
+					str++;
+				} else {
+		        		str++;
+				}
+			}
+
+			if (*str != '\0') {
+
+				return(FALSE);
+			}
+		}
+
+	        if (strlen(str) >= 6
+			   && *str == 'n'
+			   && *(str + 1) == 'e' 
+		           && *(str + 2) == 'w') {
+		  	str += 3;
+		}
+
+	        if (strlen(str) >= 3
+			   && *str == 'r'
+			   && *(str + 1) == 'a' 
+		           && *(str + 2) == 'w') {
+		  	str += 3;
+		}
+
+		if (size == 0) {
+			return(FALSE);
+		}
+
+		i++;
+
+		if (*str == ';') {
+			str++;
+		} else if (*str != '\0') {
+
+			return(FALSE);
+		}
+	}
+
+	*data_file_names = (char**)ut_malloc(i * sizeof(void*));
+	*data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint));
+	*data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint));
+
+	*n_data_files = i;
+
+	/* Then store the actual values to our arrays */
+
+	str = input_str;
+	i = 0;
+
+	while (*str != '\0') {
+		path = str;
+
+		/* Note that we must ignore the ':' in a Windows path */
+
+		while ((*str != ':' && *str != '\0')
+		       || (*str == ':'
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
+			str++;
+		}
+
+		if (*str == ':') {
+			/* Make path a null-terminated string */
+			*str = '\0';
+			str++;
+		}
+
+		size = strtoul(str, &endp, 10);
+
+		str = endp;
+
+		if ((*str != 'M') && (*str != 'G')) {
+			size = size / (1024 * 1024);
+		} else if (*str == 'G') {
+		        size = size * 1024;
+			str++;
+		} else {
+		        str++;
+		}
+
+		(*data_file_names)[i] = path;
+		(*data_file_sizes)[i] = size;
+
+	        if (strlen(str) >= ut_strlen(":autoextend")
+	            && 0 == ut_memcmp(str, ":autoextend",
+						ut_strlen(":autoextend"))) {
+
+			*is_auto_extending = TRUE;
+
+			str += ut_strlen(":autoextend");
+
+	        	if (strlen(str) >= ut_strlen(":max:")
+	            		&& 0 == ut_memcmp(str, ":max:",
+						ut_strlen(":max:"))) {
+
+				str += ut_strlen(":max:");
+
+				size = strtoul(str, &endp, 10);
+
+				str = endp;
+
+				if (*str != 'M' && *str != 'G') {
+					size = size / (1024 * 1024);
+				} else if (*str == 'G') {
+		        		size = size * 1024;
+					str++;
+				} else {
+		        		str++;
+				}
+
+				*max_auto_extend_size = size;
+			}
+
+			if (*str != '\0') {
+
+				return(FALSE);
+			}
+		}
+		
+		(*data_file_is_raw_partition)[i] = 0;
+
+	        if (strlen(str) >= 6
+			   && *str == 'n'
+			   && *(str + 1) == 'e' 
+		           && *(str + 2) == 'w') {
+		  	str += 3;
+		  	(*data_file_is_raw_partition)[i] = SRV_NEW_RAW;
+		}
+
+	        if (strlen(str) >= 3
+			   && *str == 'r'
+			   && *(str + 1) == 'a' 
+		           && *(str + 2) == 'w') {
+		 	str += 3;
+		  
+		  	if ((*data_file_is_raw_partition)[i] == 0) {
+		    		(*data_file_is_raw_partition)[i] = SRV_OLD_RAW;
+		  	}		  
+		}
+
+		i++;
+
+		if (*str == ';') {
+			str++;
+		}
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************************
+Reads log group home directories from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_log_group_home_dirs(
+/*==========================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: character string */
+	char***	log_group_home_dirs)	/* out, own: log group home dirs */
+{
+	char*	input_str;
+	char*	path;
+	ulint	i	= 0;
+
+	input_str = str;
+	
+	/* First calculate the number of directories and check syntax:
+	path;path;... */
+
+	while (*str != '\0') {
+		path = str;
+
+		while (*str != ';' && *str != '\0') {
+			str++;
+		}
+
+		i++;
+
+		if (*str == ';') {
+			str++;
+		} else if (*str != '\0') {
+
+			return(FALSE);
+		}
+	}
+
+	*log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
+
+	/* Then store the actual values to our array */
+
+	str = input_str;
+	i = 0;
+
+	while (*str != '\0') {
+		path = str;
+
+		while (*str != ';' && *str != '\0') {
+			str++;
+		}
+
+		if (*str == ';') {
+			*str = '\0';
+			str++;
+		}
+
+		(*log_group_home_dirs)[i] = path;
+
+		i++;
+	}
+
+	return(TRUE);
+}
+
 /************************************************************************
 I/o-handler thread function. */
 static
@@ -127,7 +429,7 @@ io_handler_thread(
 
 /*************************************************************************
 Normalizes a directory path for Windows: converts slashes to backslashes. */
-static
+
 void
 srv_normalize_path_for_win(
 /*=======================*/
@@ -149,7 +451,7 @@ srv_normalize_path_for_win(
 /*************************************************************************
 Adds a slash or a backslash to the end of a string if it is missing
 and the string is not empty. */
-static
+
 char*
 srv_add_path_separator_if_needed(
 /*=============================*/
@@ -356,6 +658,7 @@ open_or_create_data_files(
 	ibool	one_created	= FALSE;
 	ulint	size;
 	ulint	size_high;
+	ulint	rounded_size_pages;
 	char	name[10000];
 
 	if (srv_n_data_files >= 1000) {
@@ -435,17 +738,35 @@ open_or_create_data_files(
 				ret = os_file_get_size(files[i], &size,
 								&size_high);
 				ut_a(ret);
+				/* Round size downward to megabytes */
 		
-				/* File sizes in srv_... are given in
-				database pages */
-
-				if (size != srv_calc_low32(
-						srv_data_file_sizes[i])
-		    		    || size_high != srv_calc_high32(
-		    		    		srv_data_file_sizes[i])) {
+				rounded_size_pages = (size / (1024 * 1024)
+							+ 4096 * size_high)
+					     << (20 - UNIV_PAGE_SIZE_SHIFT);
+
+				if (i == srv_n_data_files - 1
+				    && srv_auto_extend_last_data_file) {
+
+				    	if (srv_data_file_sizes[i] >
+				    		rounded_size_pages
+				    	   || (srv_last_file_size_max > 0
+				    	      && srv_last_file_size_max <
+				    	       rounded_size_pages)) {
+				    	       	
+						fprintf(stderr,
+			"InnoDB: Error: data file %s is of a different size\n"
+			"InnoDB: than specified in the .cnf file!\n", name);	
+					}
+				    	     
+				    	srv_data_file_sizes[i] =
+				    			rounded_size_pages;
+				}
+				
+				if (rounded_size_pages
+						!= srv_data_file_sizes[i]) {
 
 					fprintf(stderr,
-			"InnoDB: Error: data file %s is of different size\n"
+			"InnoDB: Error: data file %s is of a different size\n"
 			"InnoDB: than specified in the .cnf file!\n", name);
 				
 					return(DB_ERROR);
@@ -479,7 +800,7 @@ open_or_create_data_files(
 				      >> (20 - UNIV_PAGE_SIZE_SHIFT)));
 
 			fprintf(stderr,
-	    "InnoDB: Database physically writes the file full: wait...\n");
+	"InnoDB: Database physically writes the file full: wait...\n");
 
 			ret = os_file_set_size(name, files[i],
 				srv_calc_low32(srv_data_file_sizes[i]),
@@ -681,6 +1002,8 @@ innobase_start_or_create_for_mysql(void)
 	  	os_aio_use_native_aio = TRUE;
 	}
 #endif
+	os_aio_use_native_aio = FALSE;
+	
 	if (!os_aio_use_native_aio) {
 		os_aio_init(4 * SRV_N_PENDING_IOS_PER_THREAD
 						* srv_n_file_io_threads,
@@ -727,12 +1050,10 @@ innobase_start_or_create_for_mysql(void)
 		return(DB_ERROR);
 	}
 
-	if (sizeof(ulint) == 4
-			&& srv_n_log_files * srv_log_file_size >= 262144) {
+	if (srv_n_log_files * srv_log_file_size >= 262144) {
 
 		fprintf(stderr,
-		"InnoDB: Error: combined size of log files must be < 4 GB\n"
-		"InnoDB: on 32-bit computers\n");
+		"InnoDB: Error: combined size of log files must be < 4 GB\n");
 
 		return(DB_ERROR);
 	}
@@ -764,7 +1085,6 @@ innobase_start_or_create_for_mysql(void)
 					&max_flushed_lsn, &max_arch_log_no,
 					&sum_of_new_sizes);
 	if (err != DB_SUCCESS) {
-
 	        fprintf(stderr, "InnoDB: Could not open data files\n");
 
 		return((int) err);
@@ -803,9 +1123,9 @@ innobase_start_or_create_for_mysql(void)
 			    		|| (log_opened && log_created)) {
 				fprintf(stderr, 
 	"InnoDB: Error: all log files must be created at the same time.\n"
-	"InnoDB: If you want bigger or smaller log files,\n"
-	"InnoDB: shut down the database and make sure there\n"
-	"InnoDB: were no errors in shutdown.\n"
+	"InnoDB: All log files must be created also in database creation.\n"
+	"InnoDB: If you want bigger or smaller log files, shut down the\n"
+	"InnoDB: database and make sure there were no errors in shutdown.\n"
 	"InnoDB: Then delete the existing log files. Edit the .cnf file\n"
 	"InnoDB: and start the database again.\n");
 
@@ -841,9 +1161,7 @@ innobase_start_or_create_for_mysql(void)
 
 		mutex_enter(&(log_sys->mutex));
 
-		recv_reset_logs(ut_dulint_align_down(max_flushed_lsn,
-					OS_FILE_LOG_BLOCK_SIZE),
-					max_arch_log_no + 1, TRUE);
+		recv_reset_logs(max_flushed_lsn, max_arch_log_no + 1, TRUE);
 		
 		mutex_exit(&(log_sys->mutex));
 	}
@@ -883,6 +1201,10 @@ innobase_start_or_create_for_mysql(void)
 		
                 srv_startup_is_before_trx_rollback_phase = FALSE;
 
+		/* Initialize the fsp free limit global variable in the log
+		system */
+		fsp_header_get_free_limit(0);
+
 		recv_recovery_from_archive_finish();
 	} else {
 		/* We always try to do a recovery, even if the database had
@@ -899,6 +1221,7 @@ innobase_start_or_create_for_mysql(void)
 
 		/* Since ibuf init is in dict_boot, and ibuf is needed
 		in any disk i/o, first call dict_boot */
+
 		dict_boot();
 		trx_sys_init_at_db_start();
 
@@ -906,6 +1229,11 @@ innobase_start_or_create_for_mysql(void)
 		trx_sys_init_at_db_start */
 
                 srv_startup_is_before_trx_rollback_phase = FALSE;
+
+		/* Initialize the fsp free limit global variable in the log
+		system */
+		fsp_header_get_free_limit(0);
+
 		recv_recovery_from_checkpoint_finish();
 	}
 	
@@ -975,7 +1303,7 @@ innobase_start_or_create_for_mysql(void)
 	if (err != DB_SUCCESS) {
 		return((int)DB_ERROR);
 	}
-
+	
 	/* Create the master thread which monitors the database
 	server, and does purge and other utility operations */
 
diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
index 3f40aa166ee..144ed263db9 100644
--- a/innobase/sync/sync0sync.c
+++ b/innobase/sync/sync0sync.c
@@ -220,7 +220,7 @@ mutex_create_func(
 	char*		cfile_name,	/* in: file name where created */
 	ulint		cline)		/* in: file line where created */
 {
-#ifdef _WIN32
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) 
 	mutex_reset_lock_word(mutex);
 #else	
 	os_fast_mutex_init(&(mutex->os_fast_mutex));
@@ -273,7 +273,7 @@ mutex_free(
 
 	mutex_exit(&mutex_list_mutex);
 
-#ifndef _WIN32
+#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER) 
 	os_fast_mutex_free(&(mutex->os_fast_mutex));
 #endif
 	/* If we free the mutex protecting the mutex list (freeing is
@@ -1009,7 +1009,7 @@ sync_thread_add_level(
 	} else if (level == SYNC_ANY_LATCH) {
 		ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
 	} else if (level == SYNC_TRX_SYS_HEADER) {
-		ut_a(sync_thread_levels_contain(array, SYNC_KERNEL));
+		ut_a(sync_thread_levels_g(array, SYNC_TRX_SYS_HEADER));
 	} else if (level == SYNC_DOUBLEWRITE) {
 		ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
 	} else if (level == SYNC_BUF_BLOCK) {
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
index b29ffb4b3bf..32a1db48488 100644
--- a/innobase/trx/trx0sys.c
+++ b/innobase/trx/trx0sys.c
@@ -20,12 +20,43 @@ Created 3/26/1996 Heikki Tuuri
 #include "srv0srv.h"
 #include "trx0purge.h"
 #include "log0log.h"
+#include "os0file.h"
 
 /* The transaction system */
 trx_sys_t*		trx_sys 	= NULL;
 trx_doublewrite_t*	trx_doublewrite = NULL;
 
 /********************************************************************
+Determines if a page number is located inside the doublewrite buffer. */
+
+ibool
+trx_doublewrite_page_inside(
+/*========================*/
+				/* out: TRUE if the location is inside
+				the two blocks of the doublewrite buffer */
+	ulint	page_no)	/* in: page number */
+{
+	if (trx_doublewrite == NULL) {
+
+		return(FALSE);
+	}
+
+	if (page_no >= trx_doublewrite->block1
+	    && page_no < trx_doublewrite->block1
+					+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		return(TRUE);
+	}
+
+	if (page_no >= trx_doublewrite->block2
+	    && page_no < trx_doublewrite->block2
+					+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/********************************************************************
 Creates or initialializes the doublewrite buffer at a database start. */
 static
 void
@@ -36,6 +67,11 @@ trx_doublewrite_init(
 {
 	trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
 
+	/* When we have the doublewrite buffer in use, we do not need to
+	call os_file_flush (Unix fsync) after every write. */
+	
+	os_do_not_call_flush_at_each_write = TRUE;
+	
 	mutex_create(&(trx_doublewrite->mutex));
 	mutex_set_level(&(trx_doublewrite->mutex), SYNC_DOUBLEWRITE);
 
@@ -402,7 +438,6 @@ trx_sys_update_mysql_binlog_offset(
 	trx_sysf_t*	sys_header;
 	char		namebuf[TRX_SYS_MYSQL_LOG_NAME_LEN];
 	
-	ut_ad(mutex_own(&kernel_mutex));
 	ut_ad(trx->mysql_log_file_name);
 
 	memset(namebuf, ' ', TRX_SYS_MYSQL_LOG_NAME_LEN - 1);
@@ -488,7 +523,7 @@ trx_sys_print_mysql_binlog_offset(void)
 	}
 
 	fprintf(stderr,
-	"InnoDB: Last MySQL binlog file offset %lu %lu, file name %s\n",
+	"InnoDB: Last MySQL binlog file position %lu %lu, file name %s\n",
 		mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 					+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
 		mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
index 607c80edd09..87b82cbee3a 100644
--- a/innobase/trx/trx0trx.c
+++ b/innobase/trx/trx0trx.c
@@ -71,6 +71,9 @@ trx_create(
 	trx->type = TRX_USER;
 	trx->conc_state = TRX_NOT_STARTED;
 
+	trx->check_foreigns = TRUE;
+	trx->check_unique_secondary = TRUE;
+
 	trx->dict_operation = FALSE;
 
 	trx->mysql_thd = NULL;
@@ -113,6 +116,7 @@ trx_create(
 	trx->lock_heap = mem_heap_create_in_buffer(256);
 	UT_LIST_INIT(trx->trx_locks);
 
+	trx->has_dict_foreign_key_check_lock = FALSE;
 	trx->has_search_latch = FALSE;
 	trx->search_latch_timeout = BTR_SEA_TIMEOUT;
 
@@ -703,8 +707,7 @@ trx_commit_off_kernel(
 
 		/*-------------------------------------*/
 
-		/* Only in some performance tests the variable srv_flush..
-		will be set to FALSE: */
+		/* Most MySQL users run with srv_flush.. set to FALSE: */
 
 		if (srv_flush_log_at_trx_commit) {
 		
diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c
index 79351ff120f..a1320e8b5bc 100644
--- a/innobase/ut/ut0mem.c
+++ b/innobase/ut/ut0mem.c
@@ -121,6 +121,7 @@ ut_malloc(
 {
         return(ut_malloc_low(n, TRUE));
 }
+
 /**************************************************************************
 Frees a memory block allocated with ut_malloc. */
 
diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c
index 964d5bca567..7ee32b9a8e2 100644
--- a/innobase/ut/ut0ut.c
+++ b/innobase/ut/ut0ut.c
@@ -17,6 +17,24 @@ Created 5/11/1994 Heikki Tuuri
 ibool	ut_always_false	= FALSE;
 
 /************************************************************
+Gets the high 32 bits in a ulint. That is makes a shift >> 32,
+but since there seem to be compiler bugs in both gcc and Visual C++,
+we do this by a special conversion. */
+
+ulint
+ut_get_high32(
+/*==========*/
+			/* out: a >> 32 */
+	ulint	a)	/* in: ulint */
+{
+	if (sizeof(ulint) == 4) {
+		return(0);
+	}
+
+	return(a >> 32);
+}
+
+/************************************************************
 The following function returns a clock time in milliseconds. */
 
 ulint
@@ -58,11 +76,11 @@ ut_print_timestamp(
 	FILE*  file) /* in: file where to print */
 {
 #ifdef __WIN__
-  SYSTEMTIME cal_tm;
+  	SYSTEMTIME cal_tm;
 
-  GetLocalTime(&cal_tm);
+  	GetLocalTime(&cal_tm);
 
-  fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
+  	fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
 	  (int)cal_tm.wYear % 100,
 	  (int)cal_tm.wMonth,
 	  (int)cal_tm.wDay,
@@ -70,23 +88,21 @@ ut_print_timestamp(
 	  (int)cal_tm.wMinute,
 	  (int)cal_tm.wSecond);
 #else
+	struct tm  cal_tm;
+  	struct tm* cal_tm_ptr;
+  	time_t     tm;
 
-  struct tm  cal_tm;
-  struct tm* cal_tm_ptr;
-  time_t     tm;
-
-  time(&tm);
+  	time(&tm);
 
 #ifdef HAVE_LOCALTIME_R
-  localtime_r(&tm, &cal_tm);
-  cal_tm_ptr = &cal_tm;
+  	localtime_r(&tm, &cal_tm);
+  	cal_tm_ptr = &cal_tm;
 #else
-  cal_tm_ptr = localtime(&tm);
+  	cal_tm_ptr = localtime(&tm);
 #endif
-
-  fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
+  	fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
 	  cal_tm_ptr->tm_year % 100,
-	  cal_tm_ptr->tm_mon+1,
+	  cal_tm_ptr->tm_mon + 1,
 	  cal_tm_ptr->tm_mday,
 	  cal_tm_ptr->tm_hour,
 	  cal_tm_ptr->tm_min,
@@ -94,6 +110,39 @@ ut_print_timestamp(
 #endif
 }
 
+/**************************************************************
+Returns current year, month, day. */
+
+void
+ut_get_year_month_day(
+/*==================*/
+	ulint*	year,	/* out: current year */
+	ulint*	month,	/* out: month */
+	ulint*	day)	/* out: day */
+{
+#ifdef __WIN__
+  	SYSTEMTIME cal_tm;
+
+  	GetLocalTime(&cal_tm);
+
+  	*year = (ulint)cal_tm.wYear;
+  	*month = (ulint)cal_tm.wMonth;
+  	*day = (ulint)cal_tm.wDay;
+#else
+  	struct tm  cal_tm;
+  	struct tm* cal_tm_ptr;
+  	time_t     tm;
+
+  	time(&tm);
+
+  	cal_tm_ptr = localtime(&tm);
+
+  	*year = (ulint)cal_tm_ptr->tm_year;
+  	*month = (ulint)cal_tm_ptr->tm_mon + 1;
+  	*day = (ulint)cal_tm_ptr->tm_mday;
+#endif
+}
+
 /*****************************************************************
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++. */
author	unknown <monty@bitch.mysql.fi>	2002-06-03 14:43:44 +0300
committer	unknown <monty@bitch.mysql.fi>	2002-06-03 14:43:44 +0300
commit	dddd9084a0ca7ce1e9e60a65ddb3fb8087912249 (patch)
tree	635cb4a7da4d21106255bd614f818e63c5905ede /innobase
parent	c8ca330db07a4739b58800ccc75c097a157417a6 (diff)
parent	7daf5a5d0ee7b52508943c7095a2cc150abcf616 (diff)
download	mariadb-git-dddd9084a0ca7ce1e9e60a65ddb3fb8087912249.tar.gz