diff options
87 files changed, 2054 insertions, 1704 deletions
diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index 613acd05cec..e4abb54c59e 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -9,3 +9,4 @@ paul@teton.kitebird.com sasha@mysql.sashanet.com serg@serg.mysql.com tim@threads.polyesthetic.msg +tim@white.box diff --git a/Docs/manual.texi b/Docs/manual.texi index 1de38a43aa6..998a2e73c0f 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -495,9 +495,9 @@ MySQL Table Types * MERGE:: MERGE tables * ISAM:: ISAM tables * HEAP:: HEAP tables +* InnoDB:: InnoDB tables * BDB:: BDB or Berkeley_db tables * GEMINI:: GEMINI tables -* InnoDB:: InnoDB tables MyISAM Tables @@ -516,38 +516,6 @@ MyISAM table problems. * Corrupted MyISAM tables:: * MyISAM table close:: -BDB or Berkeley_DB Tables - -* BDB overview:: Overview of BDB Tables -* BDB install:: Installing BDB -* BDB start:: BDB startup options -* BDB characteristic:: Some characteristic of @code{BDB} tables: -* BDB TODO:: Some things we need to fix for BDB in the near future: -* BDB portability:: Operating systems supported by @strong{BDB} -* BDB errors:: Errors You May Get When Using BDB Tables - -GEMINI Tables - -* GEMINI Overview:: -* Using GEMINI Tables:: - -GEMINI Overview - -* GEMINI Features:: -* GEMINI Concepts:: -* GEMINI Limitations:: - -Using GEMINI Tables - -* Startup Options:: -* Creating GEMINI Tables:: -* Backing Up GEMINI Tables:: -* Restoring GEMINI Tables:: -* Using Auto_Increment Columns With GEMINI Tables:: -* Performance Considerations:: -* Sample Configurations:: -* When To Use GEMINI Tables:: - InnoDB Tables * InnoDB overview:: InnoDB tables overview @@ -594,6 +562,38 @@ File space management and disk i/o * InnoDB File space:: * InnoDB File Defragmenting:: +BDB or Berkeley_DB Tables + +* BDB overview:: Overview of BDB Tables +* BDB install:: Installing BDB +* BDB start:: BDB startup options +* BDB characteristic:: Some characteristic of @code{BDB} tables: +* BDB TODO:: Some things we need to fix for BDB in the near future: +* BDB portability:: Operating systems supported by @strong{BDB} +* BDB errors:: Errors You May Get When Using BDB Tables + +GEMINI Tables + +* GEMINI Overview:: +* Using GEMINI Tables:: + +GEMINI Overview + +* GEMINI Features:: +* GEMINI Concepts:: +* GEMINI Limitations:: + +Using GEMINI Tables + +* Startup Options:: +* Creating GEMINI Tables:: +* Backing Up GEMINI Tables:: +* Restoring GEMINI Tables:: +* Using Auto_Increment Columns With GEMINI Tables:: +* Performance Considerations:: +* Sample Configurations:: +* When To Use GEMINI Tables:: + MySQL Tutorial * Connecting-disconnecting:: Connecting to and disconnecting from the server @@ -990,7 +990,7 @@ Changes in release 4.0.x (Development; Alpha) Changes in release 3.23.x (Stable) -* News-3.23.39a:: +* News-3.23.40:: Changes in release 3.23.40 * News-3.23.39:: Changes in release 3.23.39 * News-3.23.38:: Changes in release 3.23.38 * News-3.23.37:: Changes in release 3.23.37 @@ -2289,8 +2289,9 @@ The Berkeley DB code is very stable, but we are still improving the interface between @strong{MySQL} and BDB tables, so it will take some time before this is as tested as the other table types. -@item InnoDB Tables -- Alpha -This is a very recent addition to @code{MySQL} and is not very tested yet. +@item InnoDB Tables -- Beta +This is a recent addition to @code{MySQL}. They appear to work good and +can be used after some initial testing. @item Automatic recovery of MyISAM tables - Beta This only affects the new code that checks if the table was closed properly @@ -3580,7 +3581,7 @@ Note that older versions of @strong{MySQL} are still using a more @uref{http://www.mysql.com/support/arrangements/mypl.html, strict license}. See the documentation for that version for more information. If you need a commercial @strong{MySQL} license, because the GPL license doesn't suit your -application, you can buy one at @uref{https://order.mysql.com/license.htmy}. +application, you can buy one at @uref{https://order.mysql.com/}. For normal internal use, @strong{MySQL} costs nothing. You do not have to pay us if you do not want to. @@ -3645,7 +3646,7 @@ contact us. @xref{Contact information}. If you require a @strong{MySQL} license, the easiest way to pay for it is to use the license form on @strong{MySQL}'s secure server at -@uref{https://order.mysql.com/license.htmy}. Other forms of payment are +@uref{https://order.mysql.com/}. Other forms of payment are discussed in @ref{Payment information}. @cindex copyrights @@ -3922,7 +3923,7 @@ BOX 6434, Torsgatan 21 @end example If you want to pay by credit card over the Internet, you can use -@uref{https://order.mysql.com/license.htmy, MySQL AB's secure license form}. +@uref{https://order.mysql.com/, MySQL AB's secure license form}. You can also print a copy of the license form, fill it in, and send it by fax to: @@ -4741,7 +4742,7 @@ Please report bad or out-of-date mirrors to @email{webmaster@@mysql.com}. @c Added 20000925 @image{Flags/usa} USA [ValueClick, Los Angeles CA] @ @uref{http://mysql.valueclick.com/, WWW} -@uref{ftp://mysql.valueclick.com/mysql/, FTP} +@uref{ftp://mysql.valueclick.com/pub/mysql/Downloads/, FTP} @c @item @c Not ok 20000919; Non-existent (Matt) @@ -6439,6 +6440,7 @@ shell> CXXFLAGS=-DDONT_USE_DEFAULT_FIELDS ./configure @cindex character sets @findex configure option, --with-charset +@findex configure option, --with-extra-charset @item By default, @strong{MySQL} uses the ISO-8859-1 (Latin1) character set. To change the default set, use the @code{--with-charset} option: @@ -6464,6 +6466,13 @@ indexes may be sorted incorrectly otherwise. (This can happen if you install @strong{MySQL}, create some tables, then reconfigure @strong{MySQL} to use a different character set and reinstall it.) +With the option @code{--with-extra-charset=LIST} you can define +which additional character sets should be incompiled in the server. + +Here @code{LIST} is either a list of character set separated with space, +@code{complex} to include all characters that can't be dynamicly loaded +or @code{all} to include all character sets into the binaries. + @item To configure @strong{MySQL} with debugging code, use the @code{--with-debug} option: @@ -6483,8 +6492,8 @@ applications. @xref{Thread-safe clients}. @item Options that pertain to particular systems can be found in the -system-specific sections later in this chapter. -@xref{Source install system issues}. +system-specific sections later in this chapter. @xref{Source install +system issues}. @end itemize @node Installing source tree, Compilation problems, Installing source, Installing @@ -7327,6 +7336,10 @@ with @code{--static}. If you try to do so, you will get the error: @example ld: fatal: library -ldl: not found + +or + +undefined reference to `dlopen' @end example If too many processes try to connect very rapidly to @code{mysqld}, you will @@ -7552,6 +7565,40 @@ relative to the root of @code{glibc} Note that @strong{MySQL} will not be stable with around 600-1000 connections if @code{STACK_SIZE} is the default of 2 MB. +If you have a problem with that @strong{MySQL} can't open enough files, +or connections, it may be that you haven't configured Linux to handle +enough files. + +In Linux 2.2 and forwards, you can check the number of allocated +file handlers by doing: + +@example +cat /proc/sys/fs/file-max +cat /proc/sys/fs/dquot-max +cat /proc/sys/fs/super-max +@end example + +If you have more than 16M of memory, you should add something like the +following in your boot script (@file{/etc/rc/boot.local} on SuSE): + +@example +echo 65536 > /proc/sys/fs/file-max +echo 8192 > /proc/sys/fs/dquot-max +echo 1024 > /proc/sys/fs/super-max +@end example + +You can also run the above from the command line as root, but in this case +your old limits will be used next time your computer reboots. + +You should also add /etc/my.cnf: + +@example +[safe_mysqld] +open_files_limit=8192 +@end example + +The above should allow @strong{MySQL} to create up to 8192 connections + files. + The @code{STACK_SIZE} constant in LinuxThreads controls the spacing of thread stacks in the address space. It needs to be large enough so that there will be plenty of room for the stack of each individual thread, but small enough @@ -8218,6 +8265,17 @@ CC=gcc CXX=gcc CXXFLAGS=-O3 \ ./configure --prefix=/usr/local/mysql --with-thread-safe-client --with-named-thread-libs=-lpthread @end example +On Irix 6.5.11 with native Irix C and C++ compilers ver. 7.3.1.2, the +following is reported to work + +@example +CC=cc CXX=CC CFLAGS='-O3 -n32 -TARG:platform=IP22 -I/usr/local/include \ +-L/usr/local/lib' CXXFLAGS='-O3 -n32 -TARG:platform=IP22 \ +-I/usr/local/include -L/usr/local/lib' ./configure --prefix=/usr/local/mysql \ +--with-berkeley-db --with-innodb \ +--with-libwrap=/usr/local --with-named-curses-libs=/usr/local/lib/libncurses.a +@end example + @node FreeBSD, NetBSD, SGI-Irix, Source install system issues @subsection FreeBSD Notes @@ -9018,10 +9076,11 @@ course. For example, using @code{mysqld-opt} under NT will not allow named pipe connections. You should use either @code{mysqld-nt} or @code{mysqld-max-nt}.) -If @code{mysqld} doesn't start, please check whether or not the -@file{\mysql\data\mysql.err} file contains any reason for this. You can also -try to start the server with @code{mysqld --standalone}; In this case, you may -get some useful information on the screen that may help solve the problem. +If @code{mysqld} doesn't start, please check the +@file{\mysql\data\mysql.err} file to see if the server wrote any message +there to indicate the cause of the problem. You can also try to start +the server with @code{mysqld --standalone}; In this case, you may get +some useful information on the screen that may help solve the problem. The last option is to start @code{mysqld} with @code{--standalone --debug}. In this case @code{mysqld} will write a log file @@ -9053,9 +9112,9 @@ or C:\> C:\mysql\bin\mysqld-max-nt --install @end example -(You can also use @code{mysqld} binaries that don't end with -@code{-nt.exe} on NT, but those cannot be started as a service or use -named pipes.) +(Under Windows NT, you can actually install any of the server binaries +as a service, but only those having names that end with @code{-nt.exe} +provide support for named pipes.) You can start and stop the @strong{MySQL} service with these commands: @@ -9466,11 +9525,6 @@ For the moment, the workaround is to list the parameters in the @file{C:\my.cnf} file instead. @item -When you suspend a laptop running Win95, the @code{mysqld} daemon doesn't -accept new connections when the laptop is resumed. We don't know if this -is a problem with Win95, TCP/IP, or @strong{MySQL}. - -@item It would be real nice to be able to kill @code{mysqld} from the task manager. For the moment, you must use @code{mysqladmin shutdown}. @@ -9574,11 +9628,8 @@ and are configured with the following compilers and options: @item SunOS 4.1.4 2 sun4c with @code{gcc} 2.7.2.1 @code{CC=gcc CXX=gcc CXXFLAGS="-O3 -felide-constructors" ./configure --prefix=/usr/local/mysql --disable-shared --with-extra-charsets=complex --enable-assembler} -@item SunOS 5.5.1 sun4u with @code{egcs} 1.0.3a -@code{CC=gcc CFLAGS="-O3" CXX=gcc CXXFLAGS="-O3 -felide-constructors -fno-exceptions -fno-rtti" ./configure --prefix=/usr/local/mysql --with-low-memory --with-extra-charsets=complex} - -@item SunOS 5.6 sun4u with @code{egcs} 2.90.27 -@code{CC=gcc CFLAGS="-O3" CXX=gcc CXXFLAGS="-O3 -felide-constructors -fno-exceptions -fno-rtti" ./configure --prefix=/usr/local/mysql --with-low-memory --with-extra-charsets=complex} +@item SunOS 5.5.1 (and above) sun4u with @code{egcs} 1.0.3a or 2.90.27 or gcc 2.95.2 and newer +@code{CC=gcc CFLAGS="-O3" CXX=gcc CXXFLAGS="-O3 -felide-constructors -fno-exceptions -fno-rtti" ./configure --prefix=/usr/local/mysql --with-low-memory --with-extra-charsets=complex --enable-assembler} @item SunOS 5.6 i86pc with @code{gcc} 2.8.1 @code{CC=gcc CXX=gcc CXXFLAGS=-O3 ./configure --prefix=/usr/local/mysql --with-low-memory --with-extra-charsets=complex} @@ -10159,15 +10210,15 @@ library and for which @strong{MySQL} must be configured to use MIT-pthreads. If you can't get @code{mysqld} to start you can try to make a trace file to find the problem. @xref{Making trace files}. +If you are using InnoDB tables, refer to the InnoDB-specific startup +options. @xref{InnoDB start}. + If you are using BDB (Berkeley DB) tables, you should familiarize yourself with the different BDB specific startup options. @xref{BDB start}. If you are using Gemini tables, refer to the Gemini-specific startup options. @xref{Using GEMINI Tables}. -If you are using InnoDB tables, refer to the InnoDB-specific startup -options. @xref{InnoDB start}. - @node Automatic start, Command-line options, Starting server, Post-installation @subsection Starting and Stopping MySQL Automatically @cindex starting, the server automatically @@ -10499,6 +10550,9 @@ Run @code{mysqld} daemon as user @code{user_name}. This option is @item -V, --version Output version information and exit. +@item -W, --warnings +Print out warnings like @code{Aborted connection...} to the @code{.err} file. +@xref{Communication errors}. @end table @cindex default options @@ -16655,6 +16709,16 @@ mysql> select 'David_' LIKE 'David|_' ESCAPE '|'; -> 1 @end example +The following two statements illustrate that string comparisons are +case insensitive unless one of the operands is a binary string: + +@example +mysql> select 'abc' LIKE 'ABC'; + -> 1 +mysql> SELECT 'abc' LIKE BINARY 'ABC'; + -> 0 +@end example + @code{LIKE} is allowed on numeric expressions! (This is a @strong{MySQL} extension to the ANSI SQL @code{LIKE}.) @@ -19406,9 +19470,6 @@ INDEX} are @strong{MySQL} extensions to ANSI SQL92. @code{MODIFY} is an Oracle extension to @code{ALTER TABLE}. @item -@code{TRUNCATE} is an Oracle extension. @xref{TRUNCATE}. - -@item The optional word @code{COLUMN} is a pure noise word and can be omitted. @item @@ -19469,8 +19530,7 @@ a table row. The default is to add the column last. or removes the old default value. If the old default is removed and the column can be @code{NULL}, the new default is @code{NULL}. If the column cannot be @code{NULL}, @strong{MySQL} -assigns a default value. -Default value assignment is described in +assigns a default value, as described in @ref{CREATE TABLE, , @code{CREATE TABLE}}. @findex DROP INDEX @@ -19483,6 +19543,10 @@ If columns are dropped from a table, the columns are also removed from any index of which they are a part. If all columns that make up an index are dropped, the index is dropped as well. +@item +If a table contains only one column, the column cannot be dropped. +If what you intend is to remove the table, use @code{DROP TABLE} instead. + @findex DROP PRIMARY KEY @item @code{DROP PRIMARY KEY} drops the primary index. If no such @@ -19791,6 +19855,8 @@ minimum needed to restore it. Currenlty only works for @code{MyISAM} tables. For @code{MyISAM} table, copies @code{.frm} (definition) and @code{.MYD} (data) files. The index file can be rebuilt from those two. +Before using this command, please see @xref{Backup}. + During the backup, read lock will be held for each table, one at time, as they are being backed up. If you want to backup several tables as a snapshot, you must first issue @code{LOCK TABLES} obtaining a read @@ -19999,6 +20065,8 @@ valid, the table can be re-created this way, even if the data or index files have become corrupted. @end itemize +@code{TRUNCATE} is an Oracle SQL extension. + @findex SELECT @node SELECT, JOIN, TRUNCATE, Reference @section @code{SELECT} Syntax @@ -20619,9 +20687,9 @@ Version 3.22.15. It is a @strong{MySQL} extension to ANSI SQL92. @code{INSERT DELAYED} only works with @code{ISAM} and @code{MyISAM} tables. Note that as @code{MyISAM} tables supports concurrent -@code{SELECT} and @code{INSERT}, if there is no empty blocks in the data -file, you very seldom need to use @code{INSERT DELAYED} with -@code{MyISAM}. +@code{SELECT} and @code{INSERT}, if there is no free blocks in the +middle of the data file, you very seldom need to use @code{INSERT +DELAYED} with @code{MyISAM}. @xref{MyISAM}. When you use @code{INSERT DELAYED}, the client will get an OK at once and the row will be inserted when the table is not in use by any other thread. @@ -20754,6 +20822,13 @@ In other words, you can't access the values of the old row from a @code{REPLACE} statement. In some old @strong{MySQL} version it looked like you could do this, but that was a bug that has been corrected. +When one uses a @code{REPLACE} command, @code{mysql_affected_rows()} +will return 2 if the new row replaced and old row. This is because in +this case one row was inserted and then the duplicate was deleted. + +The above makes it easy to check if @code{REPLACE} added or replaced a +row. + @findex LOAD DATA INFILE @node LOAD DATA, UPDATE, REPLACE, Reference @section @code{LOAD DATA INFILE} Syntax @@ -22742,8 +22817,6 @@ This statement is provided for Oracle compatibility. The @code{SHOW} statement provides similar information. @xref{SHOW, , @code{SHOW}}. - - @findex BEGIN @findex COMMIT @findex ROLLBACK @@ -23664,9 +23737,9 @@ of both worlds. * MERGE:: MERGE tables * ISAM:: ISAM tables * HEAP:: HEAP tables +* InnoDB:: InnoDB tables * BDB:: BDB or Berkeley_db tables * GEMINI:: GEMINI tables -* InnoDB:: InnoDB tables @end menu @node MyISAM, MERGE, Table types, Table types @@ -23690,8 +23763,12 @@ the table was closed correctly. If @code{mysqld} is started with @code{--myisam-recover}, @code{MyISAM} tables will automatically be checked and/or repaired on open if the table wasn't closed properly. @item -You can @code{INSERT} new rows in a table without deleted rows, -while other threads are reading from the table. +You can @code{INSERT} new rows in a table that doesn't have free blocks +in the middle of the data file, at the same time other threads are +reading from the table (concurrent insert). An free block can come from +an update of a dynamic length row with much data to a row with less data +or when deleting rows. When all free blocks are used up, all future +inserts will be concurrent again. @item Support for big files (63-bit) on filesystems/operating systems that support big files. @@ -24417,1137 +24494,6 @@ SUM_OVER_ALL_KEYS(max_length_of_key + sizeof(char*) * 2) @code{sizeof(char*)} is 4 on 32-bit machines and 8 on 64-bit machines. -@cindex tables, @code{BDB} -@cindex tables, @code{Berkeley DB} -@node BDB, GEMINI, HEAP, Table types -@section BDB or Berkeley_DB Tables - -@menu -* BDB overview:: Overview of BDB Tables -* BDB install:: Installing BDB -* BDB start:: BDB startup options -* BDB characteristic:: Some characteristic of @code{BDB} tables: -* BDB TODO:: Some things we need to fix for BDB in the near future: -* BDB portability:: Operating systems supported by @strong{BDB} -* BDB errors:: Errors You May Get When Using BDB Tables -@end menu - -@node BDB overview, BDB install, BDB, BDB -@subsection Overview of BDB Tables - -Support for BDB tables is included in the @strong{MySQL} source distribution -starting from Version 3.23.34 and is activated in the @strong{MySQL}-Max -binary. - -BerkeleyDB, available at @uref{http://www.sleepycat.com/} has provided -@strong{MySQL} with a transactional table handler. By using BerkeleyDB -tables, your tables may have a greater chance of surviving crashes, and also -provides @code{COMMIT} and @code{ROLLBACK} on transactions. The -@strong{MySQL} source distribution comes with a BDB distribution that has a -couple of small patches to make it work more smoothly with @strong{MySQL}. -You can't use a non-patched @code{BDB} version with @strong{MySQL}. - -We at @strong{MySQL AB} are working in close cooperation with Sleepycat to -keep the quality of the @strong{MySQL}/BDB interface high. - -When it comes to supporting BDB tables, we are committed to help our -users to locate the problem and help creating a reproducable test case -for any problems involving BDB tables. Any such test case will be -forwarded to Sleepycat who in turn will help us find and fix the -problem. As this is a two stage operation, any problems with BDB tables -may take a little longer for us to fix than for other table handlers. -However, as the BerkeleyDB code itself has been used by many other -applications than @strong{MySQL}, we don't envision any big problems with -this. @xref{Table handler support}. - -@node BDB install, BDB start, BDB overview, BDB -@subsection Installing BDB - -If you have downloaded a binary version of @strong{MySQL} that includes -support for BerkeleyDB, simply follow the instructions for installing a -binary version of @strong{MySQL}. -@xref{Installing binary}. @xref{mysqld-max, , @code{mysqld-max}}. - -To compile @strong{MySQL} with Berkeley DB support, download @strong{MySQL} -Version 3.23.34 or newer and configure @code{MySQL} with the -@code{--with-berkeley-db} option. @xref{Installing source}. - -@example -cd /path/to/source/of/mysql-3.23.34 -./configure --with-berkeley-db -@end example - -Please refer to the manual provided with the @code{BDB} distribution for -more updated information. - -Even though Berkeley DB is in itself very tested and reliable, -the @strong{MySQL} interface is still considered beta quality. -We are actively improving and optimizing it to get it stable very -soon. - -@node BDB start, BDB characteristic, BDB install, BDB -@subsection BDB startup options - -If you are running with @code{AUTOCOMMIT=0} then your changes in @code{BDB} -tables will not be updated until you execute @code{COMMIT}. Instead of commit -you can execute @code{ROLLBACK} to forget your changes. @xref{COMMIT}. - -If you are running with @code{AUTOCOMMIT=1} (the default), your changes -will be committed immediately. You can start an extended transaction with -the @code{BEGIN WORK} SQL command, after which your changes will not be -committed until you execute @code{COMMIT} (or decide to @code{ROLLBACK} -the changes). - -The following options to @code{mysqld} can be used to change the behavior of -BDB tables: - -@multitable @columnfractions .30 .70 -@item @strong{Option} @tab @strong{Meaning} -@item @code{--bdb-home=directory} @tab Base directory for BDB tables. This should be the same directory you use for --datadir. -@item @code{--bdb-lock-detect=#} @tab Berkeley lock detect. One of (DEFAULT, OLDEST, RANDOM, or YOUNGEST). -@item @code{--bdb-logdir=directory} @tab Berkeley DB log file directory. -@item @code{--bdb-no-sync} @tab Don't synchronously flush logs. -@item @code{--bdb-no-recover} @tab Don't start Berkeley DB in recover mode. -@item @code{--bdb-shared-data} @tab Start Berkeley DB in multi-process mode (Don't use @code{DB_PRIVATE} when initializing Berkeley DB) -@item @code{--bdb-tmpdir=directory} @tab Berkeley DB tempfile name. -@item @code{--skip-bdb} @tab Don't use berkeley db. -@item @code{-O bdb_max_lock=1000} @tab Set the maximum number of locks possible. @xref{SHOW VARIABLES}. -@end multitable - -If you use @code{--skip-bdb}, @strong{MySQL} will not initialize the -Berkeley DB library and this will save a lot of memory. Of course, -you cannot use @code{BDB} tables if you are using this option. - -Normally you should start @code{mysqld} without @code{--bdb-no-recover} if you -intend to use BDB tables. This may, however, give you problems when you -try to start @code{mysqld} if the BDB log files are corrupted. @xref{Starting -server}. - -With @code{bdb_max_lock} you can specify the maximum number of locks -(10000 by default) you can have active on a BDB table. You should -increase this if you get errors of type @code{bdb: Lock table is out of -available locks} or @code{Got error 12 from ...} when you have do long -transactions or when @code{mysqld} has to examine a lot of rows to -calculate the query. - -You may also want to change @code{binlog_cache_size} and -@code{max_binlog_cache_size} if you are using big multi-line transactions. -@xref{COMMIT}. - -@node BDB characteristic, BDB TODO, BDB start, BDB -@subsection Some characteristic of @code{BDB} tables: - -@itemize @bullet -@item -To be able to rollback transactions BDB maintain log files. For maximum -performance you should place these on another disk than your databases -by using the @code{--bdb_log_dir} options. -@item -@strong{MySQL} performs a checkpoint each time a new BDB log -file is started, and removes any log files that are not needed for -current transactions. One can also run @code{FLUSH LOGS} at any time -to checkpoint the Berkeley DB tables. - -For disaster recovery, one should use table backups plus -@strong{MySQL}'s binary log. @xref{Backup}. - -@strong{Warning}: If you delete old log files that are in use, BDB will -not be able to do recovery at all and you may loose data if something -goes wrong. -@item -@strong{MySQL} requires a @code{PRIMARY KEY} in each BDB table to be -able to refer to previously read rows. If you don't create one, -@strong{MySQL} will create an maintain a hidden @code{PRIMARY KEY} for -you. The hidden key has a length of 5 bytes and is incremented for each -insert attempt. -@item -If all columns you access in a @code{BDB} table are part of the same index or -part of the primary key, then @strong{MySQL} can execute the query -without having to access the actual row. In a @code{MyISAM} table the -above holds only if the columns are part of the same index. -@item -The @code{PRIMARY KEY} will be faster than any other key, as the -@code{PRIMARY KEY} is stored together with the row data. As the other keys are -stored as the key data + the @code{PRIMARY KEY}, it's important to keep the -@code{PRIMARY KEY} as short as possible to save disk and get better speed. -@item -@code{LOCK TABLES} works on @code{BDB} tables as with other tables. If -you don't use @code{LOCK TABLE}, @strong{MYSQL} will issue an internal -multiple-write lock on the table to ensure that the table will be -properly locked if another thread issues a table lock. -@item -Internal locking in @code{BDB} tables is done on page level. -@item -@code{SELECT COUNT(*) FROM table_name} is slow as @code{BDB} tables doesn't -maintain a count of the number of rows in the table. -@item -Scanning is slower than with @code{MyISAM} tables as one has data in BDB -tables stored in B-trees and not in a separate data file. -@item -The application must always be prepared to handle cases where -any change of a @code{BDB} table may make an automatic rollback and any -read may fail with a deadlock error. -@item -Keys are not compressed to previous keys as with ISAM or MyISAM -tables. In other words, the key information will take a little more -space in @code{BDB} tables compared to MyISAM tables which don't use -@code{PACK_KEYS=0}. -@item -There is often holes in the BDB table to allow you to insert new rows in -the middle of the key tree. This makes BDB tables somewhat larger than -MyISAM tables. -@item -The optimizer needs to know an approximation of the number of rows in -the table. @strong{MySQL} solves this by counting inserts and -maintaining this in a separate segment in each BDB table. If you don't -do a lot of @code{DELETE} or @code{ROLLBACK}:s this number should be -accurate enough for the @strong{MySQL} optimizer, but as @strong{MySQL} -only store the number on close, it may be wrong if @strong{MySQL} dies -unexpectedly. It should not be fatal even if this number is not 100 % -correct. One can update the number of rows by executing @code{ANALYZE -TABLE} or @code{OPTIMIZE TABLE}. @xref{ANALYZE TABLE} . @xref{OPTIMIZE -TABLE}. -@item -If you get full disk with a @code{BDB} table, you will get an error -(probably error 28) and the transaction should roll back. This is in -contrast with @code{MyISAM} and @code{ISAM} tables where @code{mysqld} will -wait for enough free disk before continuing. -@end itemize - -@node BDB TODO, BDB portability, BDB characteristic, BDB -@subsection Some things we need to fix for BDB in the near future: - -@itemize @bullet -@item -It's very slow to open many BDB tables at the same time. If you are -going to use BDB tables, you should not have a very big table cache (> -256 ?) and you should use @code{--no-auto-rehash} with the @code{mysql} -client. We plan to partly fix this in 4.0. -@item -@code{SHOW TABLE STATUS} doesn't yet provide that much information for BDB -tables. -@item -Optimize performance. -@item -Change to not use page locks at all when we are scanning tables. -@end itemize - -@node BDB portability, BDB errors, BDB TODO, BDB -@subsection Operating systems supported by @strong{BDB} - -If you after having built @strong{MySQL} with support for BDB tables get -the following error in the log file when you start @code{mysqld}: - -@example -bdb: architecture lacks fast mutexes: applications cannot be threaded -Can't init dtabases -@end example - -This means that @code{BDB} tables are not supported for your architecture. -In this case you have to rebuild @strong{MySQL} without BDB table support. - -NOTE: The following list is not complete; We will update this as we get -more information about this. - -Currently we know that BDB tables works with the following operating -system. - -@itemize @bullet -@item -Linux 2.x intel -@item -Solaris sparc -@item -SCO OpenServer -@item -SCO UnixWare 7.0.1 -@end itemize - -It doesn't work with the following operating systems: - -@itemize @bullet -@item -Linux 2.x Alpha -@item -Max OS X -@end itemize - -@node BDB errors, , BDB portability, BDB -@subsection Errors You May Get When Using BDB Tables - -@itemize @bullet -@item -If you get the following error in the @code{hostname.err log} when -starting @code{mysqld}: - -@example -bdb: Ignoring log file: .../log.XXXXXXXXXX: unsupported log version # -@end example -it means that the new @code{BDB} version doesn't support the old log -file format. In this case you have to delete all @code{BDB} log BDB -from your database directory (the files that has the format -@code{log.XXXXXXXXXX} ) and restart @code{mysqld}. We would also -recommend you to do a @code{mysqldump --opt} of your old @code{BDB} -tables, delete the old table and restore the dump. -@item -If you are running in not @code{auto_commit} mode and delete a table you -are using by another thread you may get the following error messages in -the @strong{MySQL} error file: - -@example -001119 23:43:56 bdb: Missing log fileid entry -001119 23:43:56 bdb: txn_abort: Log undo failed for LSN: 1 3644744: Invalid -@end example - -This is not fatal but we don't recommend that you delete tables if you are -not in @code{auto_commit} mode, until this problem is fixed (the fix is -not trivial). -@end itemize - -@cindex GEMINI tables -@node GEMINI, InnoDB, BDB, Table types -@section GEMINI Tables - -@cindex GEMINI tables, overview -@menu -* GEMINI Overview:: -* Using GEMINI Tables:: -@end menu - -@node GEMINI Overview, Using GEMINI Tables, GEMINI, GEMINI -@subsection GEMINI Overview - -@code{GEMINI} is a transaction-safe table handler for @strong{MySQL}. It -provides row-level locking, robust transaction support and reliable -crash recovery. It is targeted for databases that need to handle heavy -multi-user updates typical of transaction processing applications while -still providing excellent performance for read-intensive operations. The -@code{GEMINI} table type is developed and supported by NuSphere -Corporation (see @url{http://www.nusphere.com}). - -@code{GEMINI} provides full ACID transaction properties (Atomic, -Consistent, Independent, and Durable) with a programming model that -includes support for statement atomicity and all four standard isolation -levels (Read Uncommitted, Read Committed, Repeatable Read, and -Serializable) defined in the SQL standard. - -The @code{GEMINI} tables support row-level and table-level locking to -increase concurrency in applications and allow reading of tables without -locking for maximum concurrency in a heavy update environment. The -transaction, locking, and recovery mechanisms are tightly integrated to -eliminate unnecessary administration overhead. - -In general, if @code{GEMINI} tables are selected for an application, it -is recommended that all tables updated in the application be -@code{GEMINI} tables to provide well-defined system behavior. If -non-@code{GEMINI} tables are mixed into the application then, ACID -transaction properties cannot be maintained. While there are clearly -cases where mixing table types is appropriate, it should always be done -with careful consideration of the impact on transaction consistency and -recoverability needs of the application and underlying database. - -The @code{GEMINI} table type is derived from a successful commercial -database and uses the storage kernel technology tightly integrated with -@strong{MySQL} server. The basic @code{GEMINI} technology is in use by -millions of users worldwide in production environments today. This -maturity allows @code{GEMINI} tables to provide a solution for those -users who require transaction-based behavior as part of their -applications. - -The @code{GEMINI} table handler supports a configurable data cache that -allows a significant portion of any database to be maintained in memory -while still allowing durable updates. - -@cindex GEMINI tables, features -@menu -* GEMINI Features:: -* GEMINI Concepts:: -* GEMINI Limitations:: -@end menu - -@node GEMINI Features, GEMINI Concepts, GEMINI Overview, GEMINI Overview -@subsubsection GEMINI Features - -The following summarizes the major features provided by @code{GEMINI} -tables. - -@itemize @bullet -@item -Supports all optimization statistics used by the @strong{MySQL} optimizer -including table cardinality, index range estimates and multi-component -selectivity to insure optimal query performance. - -@item -Maintains exact cardinality information for each table so @code{SELECT -COUNT(*) FROM} table-name always returns an answer immediately. - -@item -Supports index-only queries; when index data is sufficient to resolve a -query no record data is read (for non character types). - -@item -@code{GEMINI} uses block based I/O for better performance. There is no -performance penalty for using @code{VARCHAR} fields. The maximum record size is -currently 32K. - -@item -The number of rows in a single @code{GEMINI} table can be 4 quintillion -(full use of 64 bits). - -@item -Individual tables can be as large as 16 petabytes. - -@item -Locking is done at a record or row level rather than at table level -unless table locks are explicitly requested. When a row is inserted into -a table, other rows can be updated, inserted or deleted without waiting -for the inserted row to be committed. - -@item -Provides durable transactions backed by a crash recovery mechanism that -returns the database to a known consistent state in the event of an -unexpected failure. - -@item -Support for all isolation levels and statement atomicity defined in the -SQL standard. - -@item -Reliable Master Replication; the master database can survive system -failure and recover all committed transactions. -@end itemize - -@cindex GEMINI tables, concepts -@node GEMINI Concepts, GEMINI Limitations, GEMINI Features, GEMINI Overview -@subsubsection GEMINI Concepts - -This section highlights some of the important concepts behind -@code{GEMINI} and the @code{GEMINI} programming model, including: - -@itemize @bullet -@item -ACID Transactions -@item -Transaction COMMIT/ROLLBACK -@item -Statement Atomicity -@item -Recovery -@item -Isolation Levels -@item -Row-Level Locking -@end itemize - -These features are described below. - -@cindex GEMINI tables, ACID transactions -@noindent -@strong{ACID Transactions} - -ACID in the context of transactions is an acronym which stands for -@emph{Atomicity}, @emph{Consistency}, @emph{Isolation}, @emph{Durability}. - -@multitable @columnfractions .25 .75 -@item @sc{Attribute} @tab @sc{Description} -@item -@strong{Atomicity} -@tab A transaction allows for the grouping of one or more changes to -tables and rows in the database to form an atomic or indivisible -operation. That is, either all of the changes occur or none of them -do. If for any reason the transaction cannot be completed, everything -this transaction changed can be restored to the state it was in prior to -the start of the transaction via a rollback operation. - -@item -@strong{Consistency} -@tab -Transactions always operate on a consistent view of the data and when -they end always leave the data in a consistent state. Data may be said to -be consistent as long as it conforms to a set of invariants, such as no -two rows in the customer table have the same customer ID and all orders -have an associated customer row. While a transaction executes, these -invariants may be violated, but no other transaction will be allowed to -see these inconsistencies, and all such inconsistencies will have been -eliminated by the time the transaction ends. - -@item -@strong{Isolation} -@tab To a given transaction, it should appear as though it is running -all by itself on the database. The effects of concurrently running -transactions are invisible to this transaction, and the effects of this -transaction are invisible to others until the transaction is committed. - -@item -@strong{Durability} -@tab Once a transaction is committed, its effects are guaranteed to -persist even in the event of subsequent system failures. Until the -transaction commits, not only are any changes made by that transaction -not durable, but are guaranteed not to persist in the face of a system -failures, as crash recovery will rollback their effects. -@end multitable - -@cindex GEMINI tables, COMMIT/ROLLBACK -@noindent -@strong{Transaction COMMIT/ROLLBACK} - -As stated above, a transaction is a group of work being done to -data. Unless otherwise directed, @strong{MySQL} considers each statement -a transaction in itself. Multiple updates can be accomplished by placing -them in a single statement, however they are limited to a single table. - -Applications tend to require more robust use of transaction -concepts. Take, for example, a system that processes an order: A row may -be inserted in an order table, additional rows may be added to an -order-line table, updates may be made to inventory tables, etc. It is -important that if the order completes, all the changes are made to all -the tables involved; likewise if the order fails, none of the changes to -the tables must occur. To facilitate this requirement, @strong{MySQL} -has syntax to start a transaction called @code{BEGIN WORK}. All -statements that occur after the @code{BEGIN WORK} statement are grouped -into a single transaction. The end of this transaction occurs when a -@code{COMMIT} or @code{ROLLBACK} statement is encountered. After the -@code{COMMIT} or @code{ROLLBACK} the system returns back to the behavior -before the @code{BEGIN WORK} statement was encountered where every -statement is a transaction. - -To permanently turn off the behavior where every statement is a -transaction, @strong{MySQL} added a variable called -@code{AUTOCOMMIT}. The @code{AUTOCOMMIT} variable can have two values, -@code{1} and @code{0}. The mode where every statement is a transaction -is when @code{AUTOCOMMIT} is set to @code{1} (@code{AUTOCOMMIT=1}). When -@code{AUTOCOMMIT} is set to @code{0} (@code{AUTOCOMMIT=0}), then every -statement is part of the same transaction until the transaction end by -either @code{COMMIT} or @code{ROLLBACK}. Once a transaction completes, a -new transaction is immediately started and the process repeats. - -Here is an example of the SQL statements that you may find in a typical -order: - -@example -BEGIN WORK; - INSERT INTO order VALUES ...; - INSERT INTO order-lines VALUES ...; - INSERT INTO order-lines VALUES ...; - INSERT INTO order-lines VALUES ...; - UPDATE inventory WHERE ...; -COMMIT; -@end example - -This example shows how to use the @code{BEGIN WORK} statement to start a -transaction. If the variable @code{AUTOCOMMIT} is set to @code{0}, then -a transaction would have been started already. In this case, the -@code{BEGIN WORK} commits the current transaction and starts a new one. - -@cindex GEMINI tables, statement atomicity -@noindent -@strong{Statement Atomicity} - -As mentioned above, when running with @code{AUTOCOMMIT} set to @code{1}, -each statement executes as a single transaction. When a statement has an -error, then all changes make by the statement must be -undone. Transactions support this behavior. Non-transaction safe table -handlers would have a partial statement update where some of the changes -from the statement would be contained in the database and other changes -from the statement would not. Work would need to be done to manually -recover from the error. - -@cindex GEMINI tables, recovery -@noindent -@strong{Recovery} - -Transactions are the basis for database recovery. Recovery is what -supports the Durability attribute of the ACID transaction. - -@code{GEMINI} uses a separate file called the Recovery Log located in -the @code{$DATADIR} directory named @code{gemini.rl}. This file -maintains the integrity of all the @code{GEMINI} tables. @code{GEMINI} -can not recover any data from non-@code{GEMINI} tables. In addition, the -@code{gemini.rl} file is used to rollback transactions in support of the -@code{ROLLBACK} statement. - -In the event of a system failure, the next time the @strong{MySQL} -server is started, @code{GEMINI} will automatically go through its -crash recovery process. The result of crash recovery is that all the -@code{GEMINI} tables will contain the latest changes made to them, and -all transactions that were open at the time of the crash will have been -rolled back. - -The @code{GEMINI} Recovery Log reuses space when it can. Space can be -reused when information in the Recovery Log is no longer needed for -crash recovery or rollback. - -@cindex GEMINI tables, isolation levels -@noindent -@strong{Isolation Levels} - -There are four isolation levels supported by @code{GEMINI}: - -@itemize @bullet -@item -READ UNCOMMITTED -@item -READ COMMITTED -@item -REPEATABLE READ -@item -SERIALIZABLE -@end itemize - -These isolation levels apply only to shared locks obtained by select -statements, excluding select for update. Statements that get exclusive -locks always retain those locks until the transaction commits or rolls -back. - -By default, @code{GEMINI} operates at the @code{READ COMMITTED} -level. You can override the default using the following command: - -@example -SET [GLOBAL | SESSION] TRANSACTION ISOLATION LEVEL [READ UNCOMMITTED | -READ COMMITTED | REPEATABLE READ | SERIALIZABLE ] -@end example - -If the @code{SESSION} qualifier used, the specified isolation level -persists for the entire session. If the @code{GLOBAL} qualifier is used, -the specified isolation level is applied to all new connections from -this point forward. Note that the specified isolation level will not -change the behavior for existing connections including the connection -that exectues the @code{SET GLOBAL TRANSACTION ISOLATION LEVEL} -statement. - -@multitable @columnfractions .30 .70 -@item @sc{Isolation Level} @tab @sc{Description} - -@item -@strong{READ UNCOMMITTED} -@tab Does not obtain any locks when reading rows. This means that if a -row is locked by another process in a transaction that has a more strict -isolation level, the @code{READ UNCOMMITTED} query will not wait until -the locks are released before reading the row. You will get an error if -attempt any updates while running at this isolation level. - -@item -@strong{READ COMMITTED} -@tab Locks the requested rows long enough to copy the row from the -database block to the client row buffer. If a @code{READ COMMITTED} -query finds that a row is locked exclusively by another process, it will -wait until either the row has been released, or the lock timeout value -has expired. - -@item -@strong{REPEATABLE READ} -@tab Locks all the rows needed to satisfy the query. These locks are -held until the transaction ends (commits or rolls back). If a -@code{REPEATABLE READ} query finds that a row is locked exclusively by -another process, it will wait until either the row has been released, or -the lock timeout value has expired. - -@item -@strong{SERIALIZABLE} -@tab Locks the table that contains the rows needed to satisfy the -query. This lock is held until the transaction ends (commits or rolls -back). If a @code{SERIALIZABLE} query finds that a row is exclusively -locked by another process, it will wait until either the row has been -released, or the lock timeout value has expired. -@end multitable - -The statements that get exclusive locks are @code{INSERT}, -@code{UPDATE}, @code{DELETE} and @code{SELECT ... FOR UPDATE}. Select -statements without the @code{FOR UPDATE} qualifier get shared locks -which allow other not ''for update'' select statements to read the same -rows but block anyone trying to update the row from accessing it. Rows -or tables with exclusive locks block all access to the row from other -transactions until the transaction ends. - -In general terms, the higher the Isolation level the more likelihood of -having concurrent locks and therefore lock conflicts. In such cases, -adjust the @code{-O gemini_lock_table_size} accordingly. - -@cindex GEMINI tables, row-level locking -@noindent -@strong{Row-Level Locking} - -@code{GEMINI} uses row locks, which allows high concurrency for requests -on the same table. - -In order to avoid lock table overflow, SQL statements that require -applying locks to a large number of rows should either be run at the -serializable isolation level or should be covered by a lock table -statement. - -Memory must be pre-allocated for the lock table. The mysqld server -startup option @code{-0 gemini_lock_table_size} can be used to adjust -the number of concurrent locks. - -@cindex GEMINI tables, limitations -@node GEMINI Limitations, , GEMINI Concepts, GEMINI Overview -@subsubsection GEMINI Limitations - -The following limitations are in effect for the current version of -@code{GEMINI}: - -@itemize @bullet -@item -@code{DROP DATABASE} does not work with @code{GEMINI} tables; instead, -drop all the tables in the database first, then drop the database. - -@item -Maximum number of @code{GEMINI} tables is 1012. - -@item -Maximum number of @code{GEMINI} files a server can manage is 1012. Each -table consumes one file; an additional file is consumed if the table has -any indexes defined on it. - -@item -Maximum size of BLOBs is 16MB. - -@item -@code{FULLTEXT} indexes are not supported with @code{GEMINI} tables. - -@item -There is no support for multi-component @code{AUTO_INCREMENT} fields -that provide alternating values at the component level. If you try to -create such a field, @code{GEMINI} will refuse. - -@item -@code{TEMPORARY TABLES} are not supported by @code{GEMINI}. The -statement @code{CREATE TEMPORARY TABLE ... TYPE=GEMINI} will generate -the response: @code{ERROR 1005: Can't create table '/tmp/#sqlxxxxx' -(errno: 0)}. - -@item -@code{FLUSH TABLES} has not been implemented with @code{GEMINI} tables. -@end itemize - -@cindex GEMINI tables, using -@node Using GEMINI Tables, , GEMINI Overview, GEMINI -@subsection Using GEMINI Tables - -This section explains the various startup options you can use with -@code{GEMINI} tables, how to backup @code{GEMINI} tables, some -performance considerations and sample configurations, and a brief -discussion of when to use @code{GEMINI} tables. - -Specifically, the topics covered in this section are: - -@itemize @bullet -@item -Startup Options -@item -Creating @code{GEMINI} Tables -@item -Backing Up @code{GEMINI} Tables -@item -Using Auto_Increment Columns With @code{GEMINI} Tables -@item -Performance Considerations -@item -Sample Configurations -@item -When To Use @code{GEMINI} Tables -@end itemize - -@cindex GEMINI tables, startup options -@menu -* Startup Options:: -* Creating GEMINI Tables:: -* Backing Up GEMINI Tables:: -* Restoring GEMINI Tables:: -* Using Auto_Increment Columns With GEMINI Tables:: -* Performance Considerations:: -* Sample Configurations:: -* When To Use GEMINI Tables:: -@end menu - -@node Startup Options, Creating GEMINI Tables, Using GEMINI Tables, Using GEMINI Tables -@subsubsection Startup Options - -The table below lists options to mysqld that can be used to change the -behavior of @code{GEMINI} tables. - -@multitable @columnfractions .40 .60 -@item @sc{Option} @tab @sc{Description} - -@item -@code{--default-table-type=gemini} -@tab Sets the default table handler to be @code{GEMINI}. All create -table statements will create @code{GEMINI} tables unless otherwise -specified with @code{TYPE=@var{table-type}}. As noted above, there is -currently a limitation with @code{TEMPORARY} tables using @code{GEMINI}. - -@item -@code{--gemini-flush-log-at-commit} -@tab Forces the recovery log buffers to be flushed after every -commit. This can have a serious performance penalty, so use with -caution. - -@item -@code{--gemini-recovery=FULL | NONE | FORCE} -@tab Sets the recovery mode. Default is @code{FULL}. @code{NONE} is -useful for performing repeatable batch operations because the updates -are not recorded in the recovery log. @code{FORCE} skips crash recovery -upon startup; this corrupts the database, and should be used in -emergencies only. - -@item -@code{--gemini-unbuffered-io} -@tab All database writes bypass the OS cache. This can provide a -performance boost on heavily updated systems where most of the dataset -being worked on is cached in memory with the @code{gemini_buffer_cache} -parameter. - -@item -@code{--O gemini_buffer_cache=size} -@tab Amount of memory to allocate for database buffers, including Index -and Record information. It is recommended that this number be 10% of the -total size of all @code{GEMINI} tables. Do not exceed amount of memory -on the system! - -@item -@code{--O gemini_connection_limit=#} -@tab Maximum number of connections to @code{GEMINI}; default is -@code{100}. Each connection consumes about 1K of memory. - -@item -@code{--O gemini_io_threads=#} -@tab Number of background I/O threads; default is @code{2}. Increase the -number when using @code{--gemini-unbuffered-io} - -@item -@code{--O gemini_lock_table_size=#} -@tab Sets the maximum number of concurrent locks; default is 4096. Using -@code{SET [ GLOBAL | SESSION ] TRANSACTION ISOLATION = ...} will -determine how long a program will hold row locks. - -@item -@code{--O gemini_lock_wait_timeout=seconds} -@tab Number of seconds to wait for record locks when performing queries; -default is 10 seconds. Using @code{SET [ GLOBAL | SESSION ] TRANSACTION -ISOLATION = ...} will determine how long a program will hold row locks. - -@item -@code{--skip-gemini} -@tab Do not use @code{GEMINI}. If you use @code{--skip-gemini}, @strong{MySQL} -will not initialize the @code{GEMINI} table handler, saving memory; you -cannot use @code{GEMINI} tables if you use @code{--skip-gemini}. - -@item -@code{--transaction-isolation=READ-UNCOMMITTED | READ-COMMITTED | REPEATABLE-READ | SERIALIZABLE} -@tab Sets the GLOBAL transaction isolation level for all users that -connect to the server; can be overridden with the SET ISOLATION LEVEL -statement. -@end multitable - -@cindex GEMINI tables, creating -@node Creating GEMINI Tables, Backing Up GEMINI Tables, Startup Options, Using GEMINI Tables -@subsubsection Creating GEMINI Tables - -@code{GEMINI} tables can be created by either using the @code{CREATE -TABLE} syntax or the @code{ALTER TABLE} syntax. - -@itemize @bullet -@item -The syntax for creating a @code{GEMINI} table is: - -@example -CREATE TABLE @var{table-name} (....) TYPE=GEMINI; -@end example - -@item -The syntax to convert a table to @code{GEMINI} is: - -@example -ALTER TABLE @var{table-name} TYPE=GEMINI; -@end example -@end itemize - -@xref{Tutorial}, for more information on how to create and use -@code{MySQL} tables. - -@cindex GEMINI tables, backing up -@node Backing Up GEMINI Tables, Restoring GEMINI Tables, Creating GEMINI Tables, Using GEMINI Tables -@subsubsection Backing Up GEMINI Tables - -@code{GEMINI} supports both @code{BACKUP TABLE} and @code{RESTORE TABLE} -syntax. To learn more about how to use @code{BACKUP} and @code{RESTORE}, -see @ref{BACKUP TABLE} and @ref{RESTORE TABLE}. - -To backup @code{GEMINI} tables outside of the @code{MySQL} environment, -you must first shut down the @code{MySQL} server. Once the server is -shut down, you can copy the files associated with @code{GEMINI} to a -different location. The files that make up the @code{GEMINI} table -handler are: - -@itemize @bullet -@item -All files associated with a table with a @code{.gmd} extention below the -@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, -@code{@var{table}.gmi}, and @code{@var{table}.frm} -@item -@code{gemini.db} in the @code{$DATADIR} directory -@item -@code{gemini.rl} in the @code{$DATADIR} directory -@item -@code{gemini.lg} in the @code{$DATADIR} directory -@end itemize - -All the @code{GEMINI} files must be copied together. You can not copy -just the @code{.gmi} and @code{.gmd} files to a different -@code{$DATADIR} and have them become part of a new database. You can -copy an entire @code{$DATADIR} directory to another location and start a -@strong{MySQL} server using the new @code{$DATADIR}. - -@cindex GEMINI tables, restoring -@node Restoring GEMINI Tables, Using Auto_Increment Columns With GEMINI Tables, Backing Up GEMINI Tables, Using GEMINI Tables -@subsubsection Restoring GEMINI Tables - -To restore @code{GEMINI} tables outside of the @code{MySQL} environment, -you must first shut down the @code{MySQL} server. Once the server is -shut down, you can remove all @code{GEMINI} files in the target -@code{$DATADIR} and then copy the files previously backed up into the -@code{$DATADIR} directory. - -As mentioned above, the files that make up the @code{GEMINI} table -handler are: - -@itemize @bullet -@item -All files associated with a table with a @code{.gmd} extention below the -@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, -@code{@var{table}.gmi}, and @code{@var{table}.frm} -@item -@code{gemini.db} in the @code{$DATADIR} directory -@item -@code{gemini.rl} in the @code{$DATADIR} directory -@item -@code{gemini.lg} in the @code{$DATADIR} directory -@end itemize - -When restoring a table, all the @code{GEMINI} files must be copied -together. You can not restore just the @code{.gmi} and @code{.gmd} -files. - -@cindex GEMINI tables, auto_increment -@node Using Auto_Increment Columns With GEMINI Tables, Performance Considerations, Restoring GEMINI Tables, Using GEMINI Tables -@subsubsection Using Auto_Increment Columns With GEMINI Tables - -As mentioned previously, @code{GEMINI} tables support row-level and -table-level locking to increase concurrency in applications and to allow -reading of tables without locking for maximum concurrency in heavy -update environments. This feature has several implications when working -with @code{auto_increment} tables. - -In @code{MySQL}, when a column is defined as an @code{auto_increment} -column, and a row is inserted into the table with a @code{NULL} for the -column, the @code{auto_increment} column is updated to be 1 higher than -the highest value in the column. - -With @code{MyISAM} tables, the @code{auto_increment} function is -implemented by looking in the index and finding the highest value and -adding 1 to it. This is possible because the entire @code{ISAM} table is -locked during the update period and the increment value is therefore -guaranteed to not be changing. - -With @code{GEMINI} tables, the @code{auto_increment} function is -implemented by maintaining a counter in a separate location from the -table data. Instead of looking at the highest value in the table index, -@code{GEMINI} tables look at this separately maintained counter. This -means that in a transactional model, unlike the bottleneck inherent in -the @code{MyISAM} approach, @code{GEMINI} users do @b{not} have to wait -until the transaction that added the last value either commits or -rollbacks before looking at the value. - -Two side-effects of the @code{GEMINI} implementation are: - -@itemize @bullet -@item -If an insert is done where the column with the @code{auto_increment} is -specified, and this specified value is the highest value, @code{MyISAM} -uses it as its @code{auto_increment} value, and every subsequent insert -is based on this. By contrast, @code{GEMINI} does not use this value, -but instead uses the value maintained in the separate @code{GEMINI} -counter location. - -@item -To set the counter to a specific value, you can use @code{SET -insert_id=#} and insert a new row in the table. However, as a general -rule, values should not be inserted into an @code{auto_increment} -column; the database manager should be maintaining this field, not the -application. @code{SET insert_id} is a recovery mechanism that should be -used in case of error only. -@end itemize - -Note that if you delete the row containing the maximum value for an -@code{auto_increment} column, the value will be reused with a -@code{GEMINI} table but not with a @code{MyISAM} table. - -See @ref{CREATE TABLE} for more information about creating -@code{auto_increment} columns. - -@cindex GEMINI tables, peformance considerations -@node Performance Considerations, Sample Configurations, Using Auto_Increment Columns With GEMINI Tables, Using GEMINI Tables -@subsubsection Performance Considerations - -In addition to designing the best possible application, configuration of -the data and the server startup parameters need to be considered. How -the hardware is being used can have a dramatic affect on how fast the -system will respond to queries. Disk Drives and Memory must both be -considered. - -@noindent -@strong{Disk Drives} - -For best performance, you want to spread the data out over as many disks -as possible. Using RAID 10 stripes work very well. If there are a lot of -updates then the recovery log (@code{gemini.rl}) should be on a -relatively quiet disk drive. - -To spread the data out without using RAID 10, you can do the following: - -@itemize @bullet -@item -Group all the tables into three categories: Heavy Use, Moderate Use, -Light Use. - -@item -Take the number of disk drives available and use a round-robin approach -to the three categories grouping the tables on a disk drive. The result -will be an equal distribution of Heavy/Moderate/Light tables assigned to -each disk drive. - -@item -Once the tables have been converted to @code{GEMINI} by using the -@code{ALTER TABLE <name> TYPE=GEMINI} statements, move (@code{mv}) the -@code{.gmd} and @code{.gmi} files to a different disk drive and link -(@code{ln -s}) them back to the original directory where the @code{.frm} -file resides. - -@item -Finally, move the @code{gemini.rl} file to its quiet disk location and link -the file back to the @code{$DATADIR} directory. -@end itemize - -@noindent -@strong{Memory} - -The more data that can be placed in memory the faster the access to the -data. Figure out how large the @code{GEMINI} data is by adding up the -@code{.gmd} and @code{.gmi} file sizes. If you can, put at least 10% of -the data into memory. You allocate memory for the rows and indexes by -using the @code{gemini_buffer_cache} startup parameter. For example: - -@example -mysqld -O gemini_buffer_cache=800M -@end example - -@noindent -would allocate 800 MB of memory for the @code{GEMINI} buffer cache. - -@cindex GEMINI tables, sample configurations -@node Sample Configurations, When To Use GEMINI Tables, Performance Considerations, Using GEMINI Tables -@subsubsection Sample Configurations - -Based on the performance considerations above, we can look at some -examples for how to get the best performance out of the system when -using @code{GEMINI} tables. - -@multitable @columnfractions .30 .70 -@item @sc{Hardware} @tab @sc{Configuration} -@item -One CPU, 128MB memory, one disk drive -@tab Allocate 80MB of memory for reading and updating @code{GEMINI} -tables by starting the mysqld server with the following option: - -@example --O gemini_buffer_cache=80M -@end example - -@item -Two CPUs, 512MB memory, four disk drives -@tab Use RAID 10 to stripe the data across all available disks, or use -the method described in the performance considerations section, -above. Allocate 450MB of memory for reading/updating @code{GEMINI} -tables: - -@example --O gemini_buffer_cache=450M -@end example -@end multitable - -@cindex GEMINI tables, when to use -@node When To Use GEMINI Tables, , Sample Configurations, Using GEMINI Tables -@subsubsection When To Use GEMINI Tables - -Because the @code{GEMINI} table handler provides crash recovery and -transaction support, there is extra overhead that is not found in other -non-transaction safe table handlers. Here are some general guidelines -for when to employ @code{GEMINI} and when to use other non-transaction -safe tables (@code{NTST}). - -@multitable @columnfractions .30 .25 .45 -@item -@sc{Access Trends} @tab @sc{Table Type} @tab @sc{Reason} -@item -Read-only -@tab @code{NTST} -@tab Less overhead and faster -@item -Critical data -@tab @code{GEMINI} -@tab Crash recovery protection -@item -High concurrency -@tab @code{GEMINI} -@tab Row-level locking -@item -Heavy update -@tab @code{GEMINI} -@tab Row-level locking -@end multitable - -The table below shows how a typical application schema could be defined. - -@multitable @columnfractions .15 .30 .25 .30 -@item -@sc{Table} @tab @sc{Contents} @tab @sc{Table Type} @tab @sc{Reason} -@item -account -@tab Customer account data -@tab @code{GEMINI} -@tab Critical data, heavy update -@item -order -@tab Orders for a customer -@tab @code{GEMINI} -@tab Critical data, heavy update -@item -orderline -@tab Orderline detail for an order -@tab @code{GEMINI} -@tab Critical data, heavy update -@item -invdesc -@tab Inventory description -@tab @code{NTST} -@tab Read-only, frequent access -@item -salesrep -@tab Sales rep information -@tab @code{NTST} -@tab Infrequent update -@item -inventory -@tab Inventory information -@tab @code{GEMINI} -@tab High concurrency, critical data -@item -config -@tab System configuration -@tab @code{NTST} -@tab Read-only -@end multitable - @node InnoDB, , GEMINI, Table types @section InnoDB Tables @@ -25600,6 +24546,11 @@ InnoDB has been designed for maximum performance when processing large data volumes. Its CPU efficiency is probably not matched by any other disk-based relational database engine. +You can find the latest information about InnoDB at +@uref{http://www.innodb.com}. The most up-to-date version of the +InnoDB manual is always placed there, and you can also order commercial +support for InnoDB. + Technically, InnoDB is a database backend placed under @strong{MySQL}. InnoDB has its own buffer pool for caching data and indexes in main memory. InnoDB stores its tables and indexes in a tablespace, which @@ -25750,7 +24701,7 @@ than InnoDB in the same transaction, then a deadlock may arise which InnoDB cannot notice. In cases like this the timeout is useful to resolve the situation. @item @code{innodb_flush_method} @tab -(Available from 3.23.39 up.) +(Available from 3.23.40 up.) The default value for this is @code{fdatasync}. Another option is @code{O_DSYNC}. Options @code{littlesync} and @code{nosync} have the @@ -26828,11 +25779,11 @@ the maximum size for a table. The minimum tablespace size is 10 MB. @node InnoDB contact information, , InnoDB restrictions, InnoDB @subsection InnoDB contact information -Contact information of Innobase Oy, producer of the InnoDB engine: +Contact information of Innobase Oy, producer of the InnoDB engine. +Website: @uref{http://www.innodb.com}. Email: +@email{Heikki.Tuuri@@innodb.com} @example -Website: www.innodb.com -Heikki.Tuuri@@innodb.com phone: 358-9-6969 3250 (office) 358-40-5617367 (mobile) InnoDB Oy Inc. World Trade Center Helsinki @@ -26842,6 +25793,1142 @@ P.O.Box 800 Finland @end example +@cindex tables, @code{BDB} +@cindex tables, @code{Berkeley DB} +@node BDB, GEMINI, HEAP, Table types +@section BDB or Berkeley_DB Tables + +@menu +* BDB overview:: Overview of BDB Tables +* BDB install:: Installing BDB +* BDB start:: BDB startup options +* BDB characteristic:: Some characteristic of @code{BDB} tables: +* BDB TODO:: Some things we need to fix for BDB in the near future: +* BDB portability:: Operating systems supported by @strong{BDB} +* BDB errors:: Errors You May Get When Using BDB Tables +@end menu + +@node BDB overview, BDB install, BDB, BDB +@subsection Overview of BDB Tables + +Support for BDB tables is included in the @strong{MySQL} source distribution +starting from Version 3.23.34 and is activated in the @strong{MySQL}-Max +binary. + +BerkeleyDB, available at @uref{http://www.sleepycat.com/} has provided +@strong{MySQL} with a transactional table handler. By using BerkeleyDB +tables, your tables may have a greater chance of surviving crashes, and also +provides @code{COMMIT} and @code{ROLLBACK} on transactions. The +@strong{MySQL} source distribution comes with a BDB distribution that has a +couple of small patches to make it work more smoothly with @strong{MySQL}. +You can't use a non-patched @code{BDB} version with @strong{MySQL}. + +We at @strong{MySQL AB} are working in close cooperation with Sleepycat to +keep the quality of the @strong{MySQL}/BDB interface high. + +When it comes to supporting BDB tables, we are committed to help our +users to locate the problem and help creating a reproducable test case +for any problems involving BDB tables. Any such test case will be +forwarded to Sleepycat who in turn will help us find and fix the +problem. As this is a two stage operation, any problems with BDB tables +may take a little longer for us to fix than for other table handlers. +However, as the BerkeleyDB code itself has been used by many other +applications than @strong{MySQL}, we don't envision any big problems with +this. @xref{Table handler support}. + +@node BDB install, BDB start, BDB overview, BDB +@subsection Installing BDB + +If you have downloaded a binary version of @strong{MySQL} that includes +support for BerkeleyDB, simply follow the instructions for installing a +binary version of @strong{MySQL}. +@xref{Installing binary}. @xref{mysqld-max, , @code{mysqld-max}}. + +To compile @strong{MySQL} with Berkeley DB support, download @strong{MySQL} +Version 3.23.34 or newer and configure @code{MySQL} with the +@code{--with-berkeley-db} option. @xref{Installing source}. + +@example +cd /path/to/source/of/mysql-3.23.34 +./configure --with-berkeley-db +@end example + +Please refer to the manual provided with the @code{BDB} distribution for +more updated information. + +Even though Berkeley DB is in itself very tested and reliable, +the @strong{MySQL} interface is still considered beta quality. +We are actively improving and optimizing it to get it stable very +soon. + +@node BDB start, BDB characteristic, BDB install, BDB +@subsection BDB startup options + +If you are running with @code{AUTOCOMMIT=0} then your changes in @code{BDB} +tables will not be updated until you execute @code{COMMIT}. Instead of commit +you can execute @code{ROLLBACK} to forget your changes. @xref{COMMIT}. + +If you are running with @code{AUTOCOMMIT=1} (the default), your changes +will be committed immediately. You can start an extended transaction with +the @code{BEGIN WORK} SQL command, after which your changes will not be +committed until you execute @code{COMMIT} (or decide to @code{ROLLBACK} +the changes). + +The following options to @code{mysqld} can be used to change the behavior of +BDB tables: + +@multitable @columnfractions .30 .70 +@item @strong{Option} @tab @strong{Meaning} +@item @code{--bdb-home=directory} @tab Base directory for BDB tables. This should be the same directory you use for --datadir. +@item @code{--bdb-lock-detect=#} @tab Berkeley lock detect. One of (DEFAULT, OLDEST, RANDOM, or YOUNGEST). +@item @code{--bdb-logdir=directory} @tab Berkeley DB log file directory. +@item @code{--bdb-no-sync} @tab Don't synchronously flush logs. +@item @code{--bdb-no-recover} @tab Don't start Berkeley DB in recover mode. +@item @code{--bdb-shared-data} @tab Start Berkeley DB in multi-process mode (Don't use @code{DB_PRIVATE} when initializing Berkeley DB) +@item @code{--bdb-tmpdir=directory} @tab Berkeley DB tempfile name. +@item @code{--skip-bdb} @tab Don't use berkeley db. +@item @code{-O bdb_max_lock=1000} @tab Set the maximum number of locks possible. @xref{SHOW VARIABLES}. +@end multitable + +If you use @code{--skip-bdb}, @strong{MySQL} will not initialize the +Berkeley DB library and this will save a lot of memory. Of course, +you cannot use @code{BDB} tables if you are using this option. + +Normally you should start @code{mysqld} without @code{--bdb-no-recover} if you +intend to use BDB tables. This may, however, give you problems when you +try to start @code{mysqld} if the BDB log files are corrupted. @xref{Starting +server}. + +With @code{bdb_max_lock} you can specify the maximum number of locks +(10000 by default) you can have active on a BDB table. You should +increase this if you get errors of type @code{bdb: Lock table is out of +available locks} or @code{Got error 12 from ...} when you have do long +transactions or when @code{mysqld} has to examine a lot of rows to +calculate the query. + +You may also want to change @code{binlog_cache_size} and +@code{max_binlog_cache_size} if you are using big multi-line transactions. +@xref{COMMIT}. + +@node BDB characteristic, BDB TODO, BDB start, BDB +@subsection Some characteristic of @code{BDB} tables: + +@itemize @bullet +@item +To be able to rollback transactions BDB maintain log files. For maximum +performance you should place these on another disk than your databases +by using the @code{--bdb_log_dir} options. +@item +@strong{MySQL} performs a checkpoint each time a new BDB log +file is started, and removes any log files that are not needed for +current transactions. One can also run @code{FLUSH LOGS} at any time +to checkpoint the Berkeley DB tables. + +For disaster recovery, one should use table backups plus +@strong{MySQL}'s binary log. @xref{Backup}. + +@strong{Warning}: If you delete old log files that are in use, BDB will +not be able to do recovery at all and you may loose data if something +goes wrong. +@item +@strong{MySQL} requires a @code{PRIMARY KEY} in each BDB table to be +able to refer to previously read rows. If you don't create one, +@strong{MySQL} will create an maintain a hidden @code{PRIMARY KEY} for +you. The hidden key has a length of 5 bytes and is incremented for each +insert attempt. +@item +If all columns you access in a @code{BDB} table are part of the same index or +part of the primary key, then @strong{MySQL} can execute the query +without having to access the actual row. In a @code{MyISAM} table the +above holds only if the columns are part of the same index. +@item +The @code{PRIMARY KEY} will be faster than any other key, as the +@code{PRIMARY KEY} is stored together with the row data. As the other keys are +stored as the key data + the @code{PRIMARY KEY}, it's important to keep the +@code{PRIMARY KEY} as short as possible to save disk and get better speed. +@item +@code{LOCK TABLES} works on @code{BDB} tables as with other tables. If +you don't use @code{LOCK TABLE}, @strong{MYSQL} will issue an internal +multiple-write lock on the table to ensure that the table will be +properly locked if another thread issues a table lock. +@item +Internal locking in @code{BDB} tables is done on page level. +@item +@code{SELECT COUNT(*) FROM table_name} is slow as @code{BDB} tables doesn't +maintain a count of the number of rows in the table. +@item +Scanning is slower than with @code{MyISAM} tables as one has data in BDB +tables stored in B-trees and not in a separate data file. +@item +The application must always be prepared to handle cases where +any change of a @code{BDB} table may make an automatic rollback and any +read may fail with a deadlock error. +@item +Keys are not compressed to previous keys as with ISAM or MyISAM +tables. In other words, the key information will take a little more +space in @code{BDB} tables compared to MyISAM tables which don't use +@code{PACK_KEYS=0}. +@item +There is often holes in the BDB table to allow you to insert new rows in +the middle of the key tree. This makes BDB tables somewhat larger than +MyISAM tables. +@item +The optimizer needs to know an approximation of the number of rows in +the table. @strong{MySQL} solves this by counting inserts and +maintaining this in a separate segment in each BDB table. If you don't +do a lot of @code{DELETE} or @code{ROLLBACK}:s this number should be +accurate enough for the @strong{MySQL} optimizer, but as @strong{MySQL} +only store the number on close, it may be wrong if @strong{MySQL} dies +unexpectedly. It should not be fatal even if this number is not 100 % +correct. One can update the number of rows by executing @code{ANALYZE +TABLE} or @code{OPTIMIZE TABLE}. @xref{ANALYZE TABLE} . @xref{OPTIMIZE +TABLE}. +@item +If you get full disk with a @code{BDB} table, you will get an error +(probably error 28) and the transaction should roll back. This is in +contrast with @code{MyISAM} and @code{ISAM} tables where @code{mysqld} will +wait for enough free disk before continuing. +@end itemize + +@node BDB TODO, BDB portability, BDB characteristic, BDB +@subsection Some things we need to fix for BDB in the near future: + +@itemize @bullet +@item +It's very slow to open many BDB tables at the same time. If you are +going to use BDB tables, you should not have a very big table cache (> +256 ?) and you should use @code{--no-auto-rehash} with the @code{mysql} +client. We plan to partly fix this in 4.0. +@item +@code{SHOW TABLE STATUS} doesn't yet provide that much information for BDB +tables. +@item +Optimize performance. +@item +Change to not use page locks at all when we are scanning tables. +@end itemize + +@node BDB portability, BDB errors, BDB TODO, BDB +@subsection Operating systems supported by @strong{BDB} + +If you after having built @strong{MySQL} with support for BDB tables get +the following error in the log file when you start @code{mysqld}: + +@example +bdb: architecture lacks fast mutexes: applications cannot be threaded +Can't init dtabases +@end example + +This means that @code{BDB} tables are not supported for your architecture. +In this case you have to rebuild @strong{MySQL} without BDB table support. + +NOTE: The following list is not complete; We will update this as we get +more information about this. + +Currently we know that BDB tables works with the following operating +system. + +@itemize @bullet +@item +Linux 2.x intel +@item +Solaris sparc +@item +SCO OpenServer +@item +SCO UnixWare 7.0.1 +@end itemize + +It doesn't work with the following operating systems: + +@itemize @bullet +@item +Linux 2.x Alpha +@item +Max OS X +@end itemize + +@node BDB errors, , BDB portability, BDB +@subsection Errors You May Get When Using BDB Tables + +@itemize @bullet +@item +If you get the following error in the @code{hostname.err log} when +starting @code{mysqld}: + +@example +bdb: Ignoring log file: .../log.XXXXXXXXXX: unsupported log version # +@end example +it means that the new @code{BDB} version doesn't support the old log +file format. In this case you have to delete all @code{BDB} log BDB +from your database directory (the files that has the format +@code{log.XXXXXXXXXX} ) and restart @code{mysqld}. We would also +recommend you to do a @code{mysqldump --opt} of your old @code{BDB} +tables, delete the old table and restore the dump. +@item +If you are running in not @code{auto_commit} mode and delete a table you +are using by another thread you may get the following error messages in +the @strong{MySQL} error file: + +@example +001119 23:43:56 bdb: Missing log fileid entry +001119 23:43:56 bdb: txn_abort: Log undo failed for LSN: 1 3644744: Invalid +@end example + +This is not fatal but we don't recommend that you delete tables if you are +not in @code{auto_commit} mode, until this problem is fixed (the fix is +not trivial). +@end itemize + +@cindex GEMINI tables +@node GEMINI, InnoDB, BDB, Table types +@section GEMINI Tables + +@cindex GEMINI tables, overview +@menu +* GEMINI Overview:: +* Using GEMINI Tables:: +@end menu + +@node GEMINI Overview, Using GEMINI Tables, GEMINI, GEMINI +@subsection GEMINI Overview + +GEMINI is currently not included in the @strong{MySQL} 3.23 distribution +because it's not to our knowledge an open source (GPL) product. + +@code{GEMINI} is a transaction-safe table handler for @strong{MySQL}. It +provides row-level locking, robust transaction support and reliable +crash recovery. It is targeted for databases that need to handle heavy +multi-user updates typical of transaction processing applications while +still providing excellent performance for read-intensive operations. The +@code{GEMINI} table type is developed and supported by NuSphere +Corporation (see @url{http://www.nusphere.com}). + +@code{GEMINI} provides full ACID transaction properties (Atomic, +Consistent, Independent, and Durable) with a programming model that +includes support for statement atomicity and all four standard isolation +levels (Read Uncommitted, Read Committed, Repeatable Read, and +Serializable) defined in the SQL standard. + +The @code{GEMINI} tables support row-level and table-level locking to +increase concurrency in applications and allow reading of tables without +locking for maximum concurrency in a heavy update environment. The +transaction, locking, and recovery mechanisms are tightly integrated to +eliminate unnecessary administration overhead. + +In general, if @code{GEMINI} tables are selected for an application, it +is recommended that all tables updated in the application be +@code{GEMINI} tables to provide well-defined system behavior. If +non-@code{GEMINI} tables are mixed into the application then, ACID +transaction properties cannot be maintained. While there are clearly +cases where mixing table types is appropriate, it should always be done +with careful consideration of the impact on transaction consistency and +recoverability needs of the application and underlying database. + +The @code{GEMINI} table type is derived from a successful commercial +database and uses the storage kernel technology tightly integrated with +@strong{MySQL} server. The basic @code{GEMINI} technology is in use by +millions of users worldwide in production environments today. This +maturity allows @code{GEMINI} tables to provide a solution for those +users who require transaction-based behavior as part of their +applications. + +The @code{GEMINI} table handler supports a configurable data cache that +allows a significant portion of any database to be maintained in memory +while still allowing durable updates. + +@cindex GEMINI tables, features +@menu +* GEMINI Features:: +* GEMINI Concepts:: +* GEMINI Limitations:: +@end menu + +@node GEMINI Features, GEMINI Concepts, GEMINI Overview, GEMINI Overview +@subsubsection GEMINI Features + +The following summarizes the major features provided by @code{GEMINI} +tables. + +@itemize @bullet +@item +Supports all optimization statistics used by the @strong{MySQL} optimizer +including table cardinality, index range estimates and multi-component +selectivity to insure optimal query performance. + +@item +Maintains exact cardinality information for each table so @code{SELECT +COUNT(*) FROM} table-name always returns an answer immediately. + +@item +Supports index-only queries; when index data is sufficient to resolve a +query no record data is read (for non character types). + +@item +@code{GEMINI} uses block based I/O for better performance. There is no +performance penalty for using @code{VARCHAR} fields. The maximum record size is +currently 32K. + +@item +The number of rows in a single @code{GEMINI} table can be 4 quintillion +(full use of 64 bits). + +@item +Individual tables can be as large as 16 petabytes. + +@item +Locking is done at a record or row level rather than at table level +unless table locks are explicitly requested. When a row is inserted into +a table, other rows can be updated, inserted or deleted without waiting +for the inserted row to be committed. + +@item +Provides durable transactions backed by a crash recovery mechanism that +returns the database to a known consistent state in the event of an +unexpected failure. + +@item +Support for all isolation levels and statement atomicity defined in the +SQL standard. + +@item +Reliable Master Replication; the master database can survive system +failure and recover all committed transactions. +@end itemize + +@cindex GEMINI tables, concepts +@node GEMINI Concepts, GEMINI Limitations, GEMINI Features, GEMINI Overview +@subsubsection GEMINI Concepts + +This section highlights some of the important concepts behind +@code{GEMINI} and the @code{GEMINI} programming model, including: + +@itemize @bullet +@item +ACID Transactions +@item +Transaction COMMIT/ROLLBACK +@item +Statement Atomicity +@item +Recovery +@item +Isolation Levels +@item +Row-Level Locking +@end itemize + +These features are described below. + +@cindex GEMINI tables, ACID transactions +@noindent +@strong{ACID Transactions} + +ACID in the context of transactions is an acronym which stands for +@emph{Atomicity}, @emph{Consistency}, @emph{Isolation}, @emph{Durability}. + +@multitable @columnfractions .25 .75 +@item @sc{Attribute} @tab @sc{Description} +@item +@strong{Atomicity} +@tab A transaction allows for the grouping of one or more changes to +tables and rows in the database to form an atomic or indivisible +operation. That is, either all of the changes occur or none of them +do. If for any reason the transaction cannot be completed, everything +this transaction changed can be restored to the state it was in prior to +the start of the transaction via a rollback operation. + +@item +@strong{Consistency} +@tab +Transactions always operate on a consistent view of the data and when +they end always leave the data in a consistent state. Data may be said to +be consistent as long as it conforms to a set of invariants, such as no +two rows in the customer table have the same customer ID and all orders +have an associated customer row. While a transaction executes, these +invariants may be violated, but no other transaction will be allowed to +see these inconsistencies, and all such inconsistencies will have been +eliminated by the time the transaction ends. + +@item +@strong{Isolation} +@tab To a given transaction, it should appear as though it is running +all by itself on the database. The effects of concurrently running +transactions are invisible to this transaction, and the effects of this +transaction are invisible to others until the transaction is committed. + +@item +@strong{Durability} +@tab Once a transaction is committed, its effects are guaranteed to +persist even in the event of subsequent system failures. Until the +transaction commits, not only are any changes made by that transaction +not durable, but are guaranteed not to persist in the face of a system +failures, as crash recovery will rollback their effects. +@end multitable + +@cindex GEMINI tables, COMMIT/ROLLBACK +@noindent +@strong{Transaction COMMIT/ROLLBACK} + +As stated above, a transaction is a group of work being done to +data. Unless otherwise directed, @strong{MySQL} considers each statement +a transaction in itself. Multiple updates can be accomplished by placing +them in a single statement, however they are limited to a single table. + +Applications tend to require more robust use of transaction +concepts. Take, for example, a system that processes an order: A row may +be inserted in an order table, additional rows may be added to an +order-line table, updates may be made to inventory tables, etc. It is +important that if the order completes, all the changes are made to all +the tables involved; likewise if the order fails, none of the changes to +the tables must occur. To facilitate this requirement, @strong{MySQL} +has syntax to start a transaction called @code{BEGIN WORK}. All +statements that occur after the @code{BEGIN WORK} statement are grouped +into a single transaction. The end of this transaction occurs when a +@code{COMMIT} or @code{ROLLBACK} statement is encountered. After the +@code{COMMIT} or @code{ROLLBACK} the system returns back to the behavior +before the @code{BEGIN WORK} statement was encountered where every +statement is a transaction. + +To permanently turn off the behavior where every statement is a +transaction, @strong{MySQL} added a variable called +@code{AUTOCOMMIT}. The @code{AUTOCOMMIT} variable can have two values, +@code{1} and @code{0}. The mode where every statement is a transaction +is when @code{AUTOCOMMIT} is set to @code{1} (@code{AUTOCOMMIT=1}). When +@code{AUTOCOMMIT} is set to @code{0} (@code{AUTOCOMMIT=0}), then every +statement is part of the same transaction until the transaction end by +either @code{COMMIT} or @code{ROLLBACK}. Once a transaction completes, a +new transaction is immediately started and the process repeats. + +Here is an example of the SQL statements that you may find in a typical +order: + +@example +BEGIN WORK; + INSERT INTO order VALUES ...; + INSERT INTO order-lines VALUES ...; + INSERT INTO order-lines VALUES ...; + INSERT INTO order-lines VALUES ...; + UPDATE inventory WHERE ...; +COMMIT; +@end example + +This example shows how to use the @code{BEGIN WORK} statement to start a +transaction. If the variable @code{AUTOCOMMIT} is set to @code{0}, then +a transaction would have been started already. In this case, the +@code{BEGIN WORK} commits the current transaction and starts a new one. + +@cindex GEMINI tables, statement atomicity +@noindent +@strong{Statement Atomicity} + +As mentioned above, when running with @code{AUTOCOMMIT} set to @code{1}, +each statement executes as a single transaction. When a statement has an +error, then all changes make by the statement must be +undone. Transactions support this behavior. Non-transaction safe table +handlers would have a partial statement update where some of the changes +from the statement would be contained in the database and other changes +from the statement would not. Work would need to be done to manually +recover from the error. + +@cindex GEMINI tables, recovery +@noindent +@strong{Recovery} + +Transactions are the basis for database recovery. Recovery is what +supports the Durability attribute of the ACID transaction. + +@code{GEMINI} uses a separate file called the Recovery Log located in +the @code{$DATADIR} directory named @code{gemini.rl}. This file +maintains the integrity of all the @code{GEMINI} tables. @code{GEMINI} +can not recover any data from non-@code{GEMINI} tables. In addition, the +@code{gemini.rl} file is used to rollback transactions in support of the +@code{ROLLBACK} statement. + +In the event of a system failure, the next time the @strong{MySQL} +server is started, @code{GEMINI} will automatically go through its +crash recovery process. The result of crash recovery is that all the +@code{GEMINI} tables will contain the latest changes made to them, and +all transactions that were open at the time of the crash will have been +rolled back. + +The @code{GEMINI} Recovery Log reuses space when it can. Space can be +reused when information in the Recovery Log is no longer needed for +crash recovery or rollback. + +@cindex GEMINI tables, isolation levels +@noindent +@strong{Isolation Levels} + +There are four isolation levels supported by @code{GEMINI}: + +@itemize @bullet +@item +READ UNCOMMITTED +@item +READ COMMITTED +@item +REPEATABLE READ +@item +SERIALIZABLE +@end itemize + +These isolation levels apply only to shared locks obtained by select +statements, excluding select for update. Statements that get exclusive +locks always retain those locks until the transaction commits or rolls +back. + +By default, @code{GEMINI} operates at the @code{READ COMMITTED} +level. You can override the default using the following command: + +@example +SET [GLOBAL | SESSION] TRANSACTION ISOLATION LEVEL [READ UNCOMMITTED | +READ COMMITTED | REPEATABLE READ | SERIALIZABLE ] +@end example + +If the @code{SESSION} qualifier used, the specified isolation level +persists for the entire session. If the @code{GLOBAL} qualifier is used, +the specified isolation level is applied to all new connections from +this point forward. Note that the specified isolation level will not +change the behavior for existing connections including the connection +that exectues the @code{SET GLOBAL TRANSACTION ISOLATION LEVEL} +statement. + +@multitable @columnfractions .30 .70 +@item @sc{Isolation Level} @tab @sc{Description} + +@item +@strong{READ UNCOMMITTED} +@tab Does not obtain any locks when reading rows. This means that if a +row is locked by another process in a transaction that has a more strict +isolation level, the @code{READ UNCOMMITTED} query will not wait until +the locks are released before reading the row. You will get an error if +attempt any updates while running at this isolation level. + +@item +@strong{READ COMMITTED} +@tab Locks the requested rows long enough to copy the row from the +database block to the client row buffer. If a @code{READ COMMITTED} +query finds that a row is locked exclusively by another process, it will +wait until either the row has been released, or the lock timeout value +has expired. + +@item +@strong{REPEATABLE READ} +@tab Locks all the rows needed to satisfy the query. These locks are +held until the transaction ends (commits or rolls back). If a +@code{REPEATABLE READ} query finds that a row is locked exclusively by +another process, it will wait until either the row has been released, or +the lock timeout value has expired. + +@item +@strong{SERIALIZABLE} +@tab Locks the table that contains the rows needed to satisfy the +query. This lock is held until the transaction ends (commits or rolls +back). If a @code{SERIALIZABLE} query finds that a row is exclusively +locked by another process, it will wait until either the row has been +released, or the lock timeout value has expired. +@end multitable + +The statements that get exclusive locks are @code{INSERT}, +@code{UPDATE}, @code{DELETE} and @code{SELECT ... FOR UPDATE}. Select +statements without the @code{FOR UPDATE} qualifier get shared locks +which allow other not ''for update'' select statements to read the same +rows but block anyone trying to update the row from accessing it. Rows +or tables with exclusive locks block all access to the row from other +transactions until the transaction ends. + +In general terms, the higher the Isolation level the more likelihood of +having concurrent locks and therefore lock conflicts. In such cases, +adjust the @code{-O gemini_lock_table_size} accordingly. + +@cindex GEMINI tables, row-level locking +@noindent +@strong{Row-Level Locking} + +@code{GEMINI} uses row locks, which allows high concurrency for requests +on the same table. + +In order to avoid lock table overflow, SQL statements that require +applying locks to a large number of rows should either be run at the +serializable isolation level or should be covered by a lock table +statement. + +Memory must be pre-allocated for the lock table. The mysqld server +startup option @code{-0 gemini_lock_table_size} can be used to adjust +the number of concurrent locks. + +@cindex GEMINI tables, limitations +@node GEMINI Limitations, , GEMINI Concepts, GEMINI Overview +@subsubsection GEMINI Limitations + +The following limitations are in effect for the current version of +@code{GEMINI}: + +@itemize @bullet +@item +@code{DROP DATABASE} does not work with @code{GEMINI} tables; instead, +drop all the tables in the database first, then drop the database. + +@item +Maximum number of @code{GEMINI} tables is 1012. + +@item +Maximum number of @code{GEMINI} files a server can manage is 1012. Each +table consumes one file; an additional file is consumed if the table has +any indexes defined on it. + +@item +Maximum size of BLOBs is 16MB. + +@item +@code{FULLTEXT} indexes are not supported with @code{GEMINI} tables. + +@item +There is no support for multi-component @code{AUTO_INCREMENT} fields +that provide alternating values at the component level. If you try to +create such a field, @code{GEMINI} will refuse. + +@item +@code{TEMPORARY TABLES} are not supported by @code{GEMINI}. The +statement @code{CREATE TEMPORARY TABLE ... TYPE=GEMINI} will generate +the response: @code{ERROR 1005: Can't create table '/tmp/#sqlxxxxx' +(errno: 0)}. + +@item +@code{FLUSH TABLES} has not been implemented with @code{GEMINI} tables. +@end itemize + +@cindex GEMINI tables, using +@node Using GEMINI Tables, , GEMINI Overview, GEMINI +@subsection Using GEMINI Tables + +This section explains the various startup options you can use with +@code{GEMINI} tables, how to backup @code{GEMINI} tables, some +performance considerations and sample configurations, and a brief +discussion of when to use @code{GEMINI} tables. + +Specifically, the topics covered in this section are: + +@itemize @bullet +@item +Startup Options +@item +Creating @code{GEMINI} Tables +@item +Backing Up @code{GEMINI} Tables +@item +Using Auto_Increment Columns With @code{GEMINI} Tables +@item +Performance Considerations +@item +Sample Configurations +@item +When To Use @code{GEMINI} Tables +@end itemize + +@cindex GEMINI tables, startup options +@menu +* Startup Options:: +* Creating GEMINI Tables:: +* Backing Up GEMINI Tables:: +* Restoring GEMINI Tables:: +* Using Auto_Increment Columns With GEMINI Tables:: +* Performance Considerations:: +* Sample Configurations:: +* When To Use GEMINI Tables:: +@end menu + +@node Startup Options, Creating GEMINI Tables, Using GEMINI Tables, Using GEMINI Tables +@subsubsection Startup Options + +The table below lists options to mysqld that can be used to change the +behavior of @code{GEMINI} tables. + +@multitable @columnfractions .40 .60 +@item @sc{Option} @tab @sc{Description} + +@item +@code{--default-table-type=gemini} +@tab Sets the default table handler to be @code{GEMINI}. All create +table statements will create @code{GEMINI} tables unless otherwise +specified with @code{TYPE=@var{table-type}}. As noted above, there is +currently a limitation with @code{TEMPORARY} tables using @code{GEMINI}. + +@item +@code{--gemini-flush-log-at-commit} +@tab Forces the recovery log buffers to be flushed after every +commit. This can have a serious performance penalty, so use with +caution. + +@item +@code{--gemini-recovery=FULL | NONE | FORCE} +@tab Sets the recovery mode. Default is @code{FULL}. @code{NONE} is +useful for performing repeatable batch operations because the updates +are not recorded in the recovery log. @code{FORCE} skips crash recovery +upon startup; this corrupts the database, and should be used in +emergencies only. + +@item +@code{--gemini-unbuffered-io} +@tab All database writes bypass the OS cache. This can provide a +performance boost on heavily updated systems where most of the dataset +being worked on is cached in memory with the @code{gemini_buffer_cache} +parameter. + +@item +@code{--O gemini_buffer_cache=size} +@tab Amount of memory to allocate for database buffers, including Index +and Record information. It is recommended that this number be 10% of the +total size of all @code{GEMINI} tables. Do not exceed amount of memory +on the system! + +@item +@code{--O gemini_connection_limit=#} +@tab Maximum number of connections to @code{GEMINI}; default is +@code{100}. Each connection consumes about 1K of memory. + +@item +@code{--O gemini_io_threads=#} +@tab Number of background I/O threads; default is @code{2}. Increase the +number when using @code{--gemini-unbuffered-io} + +@item +@code{--O gemini_lock_table_size=#} +@tab Sets the maximum number of concurrent locks; default is 4096. Using +@code{SET [ GLOBAL | SESSION ] TRANSACTION ISOLATION = ...} will +determine how long a program will hold row locks. + +@item +@code{--O gemini_lock_wait_timeout=seconds} +@tab Number of seconds to wait for record locks when performing queries; +default is 10 seconds. Using @code{SET [ GLOBAL | SESSION ] TRANSACTION +ISOLATION = ...} will determine how long a program will hold row locks. + +@item +@code{--skip-gemini} +@tab Do not use @code{GEMINI}. If you use @code{--skip-gemini}, @strong{MySQL} +will not initialize the @code{GEMINI} table handler, saving memory; you +cannot use @code{GEMINI} tables if you use @code{--skip-gemini}. + +@item +@code{--transaction-isolation=READ-UNCOMMITTED | READ-COMMITTED | REPEATABLE-READ | SERIALIZABLE} +@tab Sets the GLOBAL transaction isolation level for all users that +connect to the server; can be overridden with the SET ISOLATION LEVEL +statement. +@end multitable + +@cindex GEMINI tables, creating +@node Creating GEMINI Tables, Backing Up GEMINI Tables, Startup Options, Using GEMINI Tables +@subsubsection Creating GEMINI Tables + +@code{GEMINI} tables can be created by either using the @code{CREATE +TABLE} syntax or the @code{ALTER TABLE} syntax. + +@itemize @bullet +@item +The syntax for creating a @code{GEMINI} table is: + +@example +CREATE TABLE @var{table-name} (....) TYPE=GEMINI; +@end example + +@item +The syntax to convert a table to @code{GEMINI} is: + +@example +ALTER TABLE @var{table-name} TYPE=GEMINI; +@end example +@end itemize + +@xref{Tutorial}, for more information on how to create and use +@code{MySQL} tables. + +@cindex GEMINI tables, backing up +@node Backing Up GEMINI Tables, Restoring GEMINI Tables, Creating GEMINI Tables, Using GEMINI Tables +@subsubsection Backing Up GEMINI Tables + +@code{GEMINI} supports both @code{BACKUP TABLE} and @code{RESTORE TABLE} +syntax. To learn more about how to use @code{BACKUP} and @code{RESTORE}, +see @ref{BACKUP TABLE} and @ref{RESTORE TABLE}. + +To backup @code{GEMINI} tables outside of the @code{MySQL} environment, +you must first shut down the @code{MySQL} server. Once the server is +shut down, you can copy the files associated with @code{GEMINI} to a +different location. The files that make up the @code{GEMINI} table +handler are: + +@itemize @bullet +@item +All files associated with a table with a @code{.gmd} extention below the +@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, +@code{@var{table}.gmi}, and @code{@var{table}.frm} +@item +@code{gemini.db} in the @code{$DATADIR} directory +@item +@code{gemini.rl} in the @code{$DATADIR} directory +@item +@code{gemini.lg} in the @code{$DATADIR} directory +@end itemize + +All the @code{GEMINI} files must be copied together. You can not copy +just the @code{.gmi} and @code{.gmd} files to a different +@code{$DATADIR} and have them become part of a new database. You can +copy an entire @code{$DATADIR} directory to another location and start a +@strong{MySQL} server using the new @code{$DATADIR}. + +@cindex GEMINI tables, restoring +@node Restoring GEMINI Tables, Using Auto_Increment Columns With GEMINI Tables, Backing Up GEMINI Tables, Using GEMINI Tables +@subsubsection Restoring GEMINI Tables + +To restore @code{GEMINI} tables outside of the @code{MySQL} environment, +you must first shut down the @code{MySQL} server. Once the server is +shut down, you can remove all @code{GEMINI} files in the target +@code{$DATADIR} and then copy the files previously backed up into the +@code{$DATADIR} directory. + +As mentioned above, the files that make up the @code{GEMINI} table +handler are: + +@itemize @bullet +@item +All files associated with a table with a @code{.gmd} extention below the +@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, +@code{@var{table}.gmi}, and @code{@var{table}.frm} +@item +@code{gemini.db} in the @code{$DATADIR} directory +@item +@code{gemini.rl} in the @code{$DATADIR} directory +@item +@code{gemini.lg} in the @code{$DATADIR} directory +@end itemize + +When restoring a table, all the @code{GEMINI} files must be copied +together. You can not restore just the @code{.gmi} and @code{.gmd} +files. + +@cindex GEMINI tables, auto_increment +@node Using Auto_Increment Columns With GEMINI Tables, Performance Considerations, Restoring GEMINI Tables, Using GEMINI Tables +@subsubsection Using Auto_Increment Columns With GEMINI Tables + +As mentioned previously, @code{GEMINI} tables support row-level and +table-level locking to increase concurrency in applications and to allow +reading of tables without locking for maximum concurrency in heavy +update environments. This feature has several implications when working +with @code{auto_increment} tables. + +In @code{MySQL}, when a column is defined as an @code{auto_increment} +column, and a row is inserted into the table with a @code{NULL} for the +column, the @code{auto_increment} column is updated to be 1 higher than +the highest value in the column. + +With @code{MyISAM} tables, the @code{auto_increment} function is +implemented by looking in the index and finding the highest value and +adding 1 to it. This is possible because the entire @code{ISAM} table is +locked during the update period and the increment value is therefore +guaranteed to not be changing. + +With @code{GEMINI} tables, the @code{auto_increment} function is +implemented by maintaining a counter in a separate location from the +table data. Instead of looking at the highest value in the table index, +@code{GEMINI} tables look at this separately maintained counter. This +means that in a transactional model, unlike the bottleneck inherent in +the @code{MyISAM} approach, @code{GEMINI} users do @b{not} have to wait +until the transaction that added the last value either commits or +rollbacks before looking at the value. + +Two side-effects of the @code{GEMINI} implementation are: + +@itemize @bullet +@item +If an insert is done where the column with the @code{auto_increment} is +specified, and this specified value is the highest value, @code{MyISAM} +uses it as its @code{auto_increment} value, and every subsequent insert +is based on this. By contrast, @code{GEMINI} does not use this value, +but instead uses the value maintained in the separate @code{GEMINI} +counter location. + +@item +To set the counter to a specific value, you can use @code{SET +insert_id=#} and insert a new row in the table. However, as a general +rule, values should not be inserted into an @code{auto_increment} +column; the database manager should be maintaining this field, not the +application. @code{SET insert_id} is a recovery mechanism that should be +used in case of error only. +@end itemize + +Note that if you delete the row containing the maximum value for an +@code{auto_increment} column, the value will be reused with a +@code{GEMINI} table but not with a @code{MyISAM} table. + +See @ref{CREATE TABLE} for more information about creating +@code{auto_increment} columns. + +@cindex GEMINI tables, peformance considerations +@node Performance Considerations, Sample Configurations, Using Auto_Increment Columns With GEMINI Tables, Using GEMINI Tables +@subsubsection Performance Considerations + +In addition to designing the best possible application, configuration of +the data and the server startup parameters need to be considered. How +the hardware is being used can have a dramatic affect on how fast the +system will respond to queries. Disk Drives and Memory must both be +considered. + +@noindent +@strong{Disk Drives} + +For best performance, you want to spread the data out over as many disks +as possible. Using RAID 10 stripes work very well. If there are a lot of +updates then the recovery log (@code{gemini.rl}) should be on a +relatively quiet disk drive. + +To spread the data out without using RAID 10, you can do the following: + +@itemize @bullet +@item +Group all the tables into three categories: Heavy Use, Moderate Use, +Light Use. + +@item +Take the number of disk drives available and use a round-robin approach +to the three categories grouping the tables on a disk drive. The result +will be an equal distribution of Heavy/Moderate/Light tables assigned to +each disk drive. + +@item +Once the tables have been converted to @code{GEMINI} by using the +@code{ALTER TABLE <name> TYPE=GEMINI} statements, move (@code{mv}) the +@code{.gmd} and @code{.gmi} files to a different disk drive and link +(@code{ln -s}) them back to the original directory where the @code{.frm} +file resides. + +@item +Finally, move the @code{gemini.rl} file to its quiet disk location and link +the file back to the @code{$DATADIR} directory. +@end itemize + +@noindent +@strong{Memory} + +The more data that can be placed in memory the faster the access to the +data. Figure out how large the @code{GEMINI} data is by adding up the +@code{.gmd} and @code{.gmi} file sizes. If you can, put at least 10% of +the data into memory. You allocate memory for the rows and indexes by +using the @code{gemini_buffer_cache} startup parameter. For example: + +@example +mysqld -O gemini_buffer_cache=800M +@end example + +@noindent +would allocate 800 MB of memory for the @code{GEMINI} buffer cache. + +@cindex GEMINI tables, sample configurations +@node Sample Configurations, When To Use GEMINI Tables, Performance Considerations, Using GEMINI Tables +@subsubsection Sample Configurations + +Based on the performance considerations above, we can look at some +examples for how to get the best performance out of the system when +using @code{GEMINI} tables. + +@multitable @columnfractions .30 .70 +@item @sc{Hardware} @tab @sc{Configuration} +@item +One CPU, 128MB memory, one disk drive +@tab Allocate 80MB of memory for reading and updating @code{GEMINI} +tables by starting the mysqld server with the following option: + +@example +-O gemini_buffer_cache=80M +@end example + +@item +Two CPUs, 512MB memory, four disk drives +@tab Use RAID 10 to stripe the data across all available disks, or use +the method described in the performance considerations section, +above. Allocate 450MB of memory for reading/updating @code{GEMINI} +tables: + +@example +-O gemini_buffer_cache=450M +@end example +@end multitable + +@cindex GEMINI tables, when to use +@node When To Use GEMINI Tables, , Sample Configurations, Using GEMINI Tables +@subsubsection When To Use GEMINI Tables + +Because the @code{GEMINI} table handler provides crash recovery and +transaction support, there is extra overhead that is not found in other +non-transaction safe table handlers. Here are some general guidelines +for when to employ @code{GEMINI} and when to use other non-transaction +safe tables (@code{NTST}). + +Note that in the following table, you could instead of GEMINI use +InnoDB or BDB tables. + +@multitable @columnfractions .30 .25 .45 +@item +@sc{Access Trends} @tab @sc{Table Type} @tab @sc{Reason} +@item +Read-only +@tab @code{NTST} +@tab Less overhead and faster +@item +Critical data +@tab @code{GEMINI} +@tab Crash recovery protection +@item +High concurrency +@tab @code{GEMINI} +@tab Row-level locking +@item +Heavy update +@tab @code{GEMINI} +@tab Row-level locking +@end multitable + +The table below shows how a typical application schema could be defined. + +@multitable @columnfractions .15 .30 .25 .30 +@item +@sc{Table} @tab @sc{Contents} @tab @sc{Table Type} @tab @sc{Reason} +@item +account +@tab Customer account data +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +order +@tab Orders for a customer +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +orderline +@tab Orderline detail for an order +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +invdesc +@tab Inventory description +@tab @code{NTST} +@tab Read-only, frequent access +@item +salesrep +@tab Sales rep information +@tab @code{NTST} +@tab Infrequent update +@item +inventory +@tab Inventory information +@tab @code{GEMINI} +@tab High concurrency, critical data +@item +config +@tab System configuration +@tab @code{NTST} +@tab Read-only +@end multitable @cindex tutorial @cindex terminal monitor, defined @@ -29259,8 +29346,9 @@ your changes with the new @file{errmsg.txt} file. @node Character sets, Adding character set, Languages, Languages @subsection The Character Set Used for Data and Sorting -By default, @strong{MySQL} uses the ISO-8859-1 (Latin1) character -set. This is the character set used in the USA and western Europe. +By default, @strong{MySQL} uses the ISO-8859-1 (Latin1) character set +with sorting according to Swedish/Finnish. This is the character set suitable +in the USA and western Europe. All standard @strong{MySQL} binaries are compiled with @code{--with-extra-charsets=complex}. This will add code to all @@ -29272,12 +29360,12 @@ The character set determines what characters are allowed in names and how things are sorted by the @code{ORDER BY} and @code{GROUP BY} clauses of the @code{SELECT} statement. -You can change the character set with the -@code{--default-character-set} option when you start the server. -The character sets available depend on the @code{--with-charset=charset} -option to @code{configure}, and the character set configuration files -listed in @file{SHAREDIR/charsets/Index}. -@xref{Quick install}. +You can change the character set with the @code{--default-character-set} +option when you start the server. The character sets available depend +on the @code{--with-charset=charset} and @code{--with-extra-charset= +list-of-charset | complex | all} options to @code{configure}, and the +character set configuration files listed in +@file{SHAREDIR/charsets/Index}. @xref{configure options}. If you change the character set when running @strong{MySQL} (which may also change the sort order), you must run myisamchk -r -q on all @@ -34834,6 +34922,10 @@ After you have installed the packed table into the @strong{MySQL} database directory you should do @code{mysqladmin flush-tables} to force @code{mysqld} to start using the new table. +If you want to unpack a packed table, you can do this with the +@code{--unpack} option to @code{isamchk} or @code{myisamchk}. + + @cindex installation maintenance @cindex maintaining, tables @cindex tables, maintaining @@ -38166,6 +38258,9 @@ option. @node Communication errors, Full table, Packet too large, Common errors @subsection Communication Errors / Aborted Connection +Starting with @code{MySQL 3.23.40} you only get the @code{Aborted +connection} error of you start @code{mysqld} with @code{--warnings}. + If you find errors like the following in your error log. @example @@ -38690,6 +38785,8 @@ shell> perror 23 File table overflow shell> perror 24 Too many open files +shell> perror 11 +Resource temporarily unavailable @end example The problem here is that @code{mysqld} is trying to keep open too many @@ -39207,8 +39304,8 @@ database directory. The @code{FLUSH TABLE} is needed to ensure that the all active index pages is written to disk before you start the backup. If you want to make a SQL level backup of a table, you can use -@code{SELECT INTO OUTFILE} or @code{BACKUP -TABLE}. @xref{SELECT}. @xref{BACKUP TABLE}. +@code{SELECT INTO OUTFILE} or @code{BACKUP TABLE}. @xref{SELECT}. +@xref{BACKUP TABLE}. Another way to back up a database is to use the @code{mysqldump} program or the @code{mysqlhotcopy script}. @xref{mysqldump, , @code{mysqldump}}. @@ -41737,7 +41834,7 @@ specified explicitly. @item The @code{passwd} parameter contains the password for @code{user}. If @code{passwd} is @code{NULL}, only entries in the @code{user} table for the -user that have a blank password field will be checked for a match. This +user that have a blank (empty) password field will be checked for a match. This allows the database administrator to set up the @strong{MySQL} privilege system in such a way that users get different privileges depending on whether or not they have specified a password. @@ -41770,7 +41867,7 @@ of the following flags in very special circumstances: @code{mysqld} to be more ODBC-friendly. @item @code{CLIENT_COMPRESS} @tab Use compression protocol. @item @code{CLIENT_FOUND_ROWS} @tab Return the number of found (matched) rows, not the number of affected rows. -@item @code{CLIENT_IGNORE_SPACE} $tab Allow spaces after function names. Makes all functions names reserved words. +@item @code{CLIENT_IGNORE_SPACE} @tab Allow spaces after function names. Makes all functions names reserved words. @item @code{CLIENT_INTERACTIVE} @tab Allow @code{interactive_timeout} seconds (instead of @code{wait_timeout} seconds) of inactivity before closing the connection. @item @code{CLIENT_NO_SCHEMA} @tab Don't allow the @code{db_name.tbl_name.col_name} syntax. This is for ODBC. It causes the parser to generate an error if you use that syntax, which is useful for trapping bugs in some ODBC programs. @item @code{CLIENT_ODBC} @tab The client is an ODBC client. This changes @@ -46247,9 +46344,25 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item +Renamed @code{safe_mysqld} to @code{mysqld_safe}. +@item +Allow one to use @code{IN} instead of @code{FROM} in @code{SHOW} commands. +@item +@code{SHOW INDEXES} is now a synonym for @code{SHOW INDEX}. +@item Added support for symbolic links to @code{MyISAM} tables. Symlink handling is now enabled by default for Windows. @item +@code{LOAD DATA FROM MASTER} "auto-magically" sets up a slave. +@item +A new @code{HANDLER} interface to @code{MyISAM} tables. +@item +@code{COUNT(DISTINCT)} is about 30% faster. +@item +Creating full text indexes are now much faster. +@item +Searching on packed (@code{CHAR}/@code{VARCHAR}) keys are now much faster. +@item Added @code{SQL_CALC_FOUND_ROWS} and @code{FOUND_ROWS()}. This makes it possible to know how many rows a query would have returned without a @code{LIMIT} clause. @@ -46310,7 +46423,7 @@ users use this code as the rest of the code and because of this we are not yet 100% confident in this code. @menu -* News-3.23.39a:: +* News-3.23.40:: Changes in release 3.23.40 * News-3.23.39:: Changes in release 3.23.39 * News-3.23.38:: Changes in release 3.23.38 * News-3.23.37:: Changes in release 3.23.37 @@ -46354,24 +46467,44 @@ not yet 100% confident in this code. * News-3.23.0:: Changes in release 3.23.0 @end menu -@node News-3.23.39a, News-3.23.39, News-3.23.x, News-3.23.x -@appendixsubsec Changes in release 3.23.39a +@node News-3.23.40, News-3.23.39, News-3.23.x, News-3.23.x +@appendixsubsec Changes in release 3.23.40 @itemize @bullet @item -Fixed a small bug in the mysql-test benchmark suite +Fixed parser to allow floats of type @code{1.0e1} (no sign after @code{e}). +@item +Option @code{--force} to @code{myisamchk} now also updates states. +@item +Added option @code{--warnings} to @code{mysqld}. Now @code{mysqld} +only prints the error @code{Aborted connection} if this option is used. +@item +Fixed problem with @code{SHOW CREATE TABLE} when you didn't have a +@code{PRIMARY KEY}. +@item +Fixed properly the rename of @code{innodb_unix_file_flush_method} to +@code{innodb_flush_method}. +@item +Fixed bug when converting @code{UNSIGNED BIGINT} to @code{DOUBLE}. This caused +a problem when doing comparisons with @code{BIGINT}'s outside of the +signed range. +@item +Fixed bug in @code{BDB} tables when querying empty tables. +@item +Fixed a bug when using @code{COUNT(DISTINCT)} with @code{LEFT JOIN} and +there wasn't any matching rows. @end itemize -@node News-3.23.39, News-3.23.38, News-3.23.39a, News-3.23.x +@node News-3.23.39, News-3.23.38, News-3.23.40, News-3.23.x @appendixsubsec Changes in release 3.23.39 @itemize @bullet @item -If one dropped and added an @code{AUTO_INCREMENT} column, the -@code{AUTO_INCREMENT} sequence wasn't reset. +The @code{AUTO_INCREMENT} sequence wasn't reset when dropping +and adding an @code{AUTO_INCREMENT} column. @item -@code{CREATE .. SELECT} now creates not unique indexes delayed. +@code{CREATE ... SELECT} now creates non-unique indexes delayed. @item Fixed problem where @code{LOCK TABLES table_name READ} followed by -@code{FLUSH TABLES} put a exclusive lock on the table. +@code{FLUSH TABLES} put an exclusive lock on the table. @item @code{REAL} @@variables with was represented with 2 digits when converted to strings. @@ -46383,8 +46516,8 @@ that only had the open count wrong. @item Added functions to handle symbolic links to make life easier in 4.0. @item -We are now using the @code{-lcma} thread library on HP-UX 10.20 to -get @strong{MySQL} more stable on HP-UX. +We are now using the @code{-lcma} thread library on HP-UX 10.20 so +that @strong{MySQL} will be more stable on HP-UX. @item Fixed problem with @code{IF()} and number of decimals in the result. @item diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c index a9379837847..ebaa8366c72 100644 --- a/client/mysqlcheck.c +++ b/client/mysqlcheck.c @@ -16,7 +16,7 @@ /* By Jani Tolonen, 2001-04-20, MySQL Development Team */ -#define CHECK_VERSION "1.01" +#define CHECK_VERSION "1.02" #include <global.h> #include <my_sys.h> @@ -503,25 +503,24 @@ static int use_db(char *database) static int handle_request_for_tables(char *tables, uint length) { - char *query, *end, options[100]; + char *query, *end, options[100], message[100]; const char *op = 0; options[0] = 0; + end = options; switch (what_to_do) { case DO_CHECK: op = "CHECK"; - end = options; - if (opt_quick) end = strmov(end, "QUICK"); - if (opt_fast) end = strmov(end, "FAST"); - if (opt_medium_check) end = strmov(end, "MEDIUM"); /* Default */ - if (opt_extended) end = strmov(end, "EXTENDED"); - if (opt_check_only_changed) end = strmov(end, "CHANGED"); + if (opt_quick) end = strmov(end, " QUICK"); + if (opt_fast) end = strmov(end, " FAST"); + if (opt_medium_check) end = strmov(end, " MEDIUM"); /* Default */ + if (opt_extended) end = strmov(end, " EXTENDED"); + if (opt_check_only_changed) end = strmov(end, " CHANGED"); break; case DO_REPAIR: op = "REPAIR"; - end = options; - if (opt_quick) end = strmov(end, "QUICK"); - if (opt_extended) end = strmov(end, "EXTENDED"); + if (opt_quick) end = strmov(end, " QUICK"); + if (opt_extended) end = strmov(end, " EXTENDED"); break; case DO_ANALYZE: op = "ANALYZE"; @@ -533,11 +532,11 @@ static int handle_request_for_tables(char *tables, uint length) if (!(query =(char *) my_malloc((sizeof(char)*(length+110)), MYF(MY_WME)))) return 1; - sprintf(query, "%s TABLE %s %s", op, options, tables); + sprintf(query, "%s TABLE %s %s", op, tables, options); if (mysql_query(sock, query)) { - sprintf(options, "when executing '%s TABLE'", op); - DBerror(sock, options); + sprintf(message, "when executing '%s TABLE ... %s", op, options); + DBerror(sock, message); return 1; } print_result(); @@ -551,23 +550,34 @@ static void print_result() MYSQL_RES *res; MYSQL_ROW row; char prev[NAME_LEN*2+2]; - int i; + uint i; + my_bool found_error=0; res = mysql_use_result(sock); prev[0] = '\0'; for (i = 0; (row = mysql_fetch_row(res)); i++) { int changed = strcmp(prev, row[0]); - int status = !strcmp(row[2], "status"); - if (opt_silent && status) - continue; + my_bool status = !strcmp(row[2], "status"); + + if (status) + { + if (found_error) + { + if (what_to_do != DO_REPAIR && opt_auto_repair && + (!opt_fast || strcmp(row[3],"OK"))) + insert_dynamic(&tables4repair, row[0]); + } + found_error=0; + if (opt_silent) + continue; + } if (status && changed) printf("%-50s %s", row[0], row[3]); else if (!status && changed) { printf("%s\n%-9s: %s", row[0], row[2], row[3]); - if (what_to_do != DO_REPAIR && opt_auto_repair) - insert_dynamic(&tables4repair, row[0]); + found_error=1; } else printf("%-9s: %s", row[2], row[3]); diff --git a/configure.in b/configure.in index 1b95a5d5800..37bc22ff63e 100644 --- a/configure.in +++ b/configure.in @@ -4,7 +4,7 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(sql/mysqld.cc) AC_CANONICAL_SYSTEM # The Docs Makefile.am parses this line! -AM_INIT_AUTOMAKE(mysql, 3.23.39a) +AM_INIT_AUTOMAKE(mysql, 3.23.39) AM_CONFIG_HEADER(config.h) PROTOCOL_VERSION=10 diff --git a/include/mysqld_error.h b/include/mysqld_error.h index e412f95a8e4..cfcf7672013 100644 --- a/include/mysqld_error.h +++ b/include/mysqld_error.h @@ -208,4 +208,6 @@ #define ER_LOCK_WAIT_TIMEOUT 1205 #define ER_LOCK_TABLE_FULL 1206 #define ER_READ_ONLY_TRANSACTION 1207 -#define ER_ERROR_MESSAGES 208 +#define ER_DROP_DB_WITH_READ_LOCK 1208 +#define ER_CREATE_DB_WITH_READ_LOCK 1209 +#define ER_ERROR_MESSAGES 210 diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index 0388785b3fe..5e625553929 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -246,6 +246,12 @@ btr_pcur_restore_position( && btr_pcur_is_on_user_rec(cursor, mtr) && (0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor)))) { + /* We have to store the NEW value for the modify clock, since + the cursor can now be on a different page! */ + + cursor->modify_clock = buf_frame_get_modify_clock( + buf_frame_align( + btr_pcur_get_rec(cursor))); mem_heap_free(heap); return(TRUE); diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index 0046a3761a6..ede9e621462 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -241,6 +241,8 @@ buf_block_init( block->modify_clock = ut_dulint_zero; + block->file_page_was_freed = FALSE; + rw_lock_create(&(block->lock)); ut_ad(rw_lock_validate(&(block->lock))); @@ -543,6 +545,64 @@ buf_page_peek( } /************************************************************************ +Sets file_page_was_freed TRUE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_set_file_page_was_freed( +/*=============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset) /* in: page number */ +{ + buf_block_t* block; + + mutex_enter_fast(&(buf_pool->mutex)); + + block = buf_page_hash_get(space, offset); + + if (block) { + block->file_page_was_freed = TRUE; + } + + mutex_exit(&(buf_pool->mutex)); + + return(block); +} + +/************************************************************************ +Sets file_page_was_freed FALSE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_reset_file_page_was_freed( +/*===============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset) /* in: page number */ +{ + buf_block_t* block; + + mutex_enter_fast(&(buf_pool->mutex)); + + block = buf_page_hash_get(space, offset); + + if (block) { + block->file_page_was_freed = FALSE; + } + + mutex_exit(&(buf_pool->mutex)); + + return(block); +} + +/************************************************************************ This is the general function used to get access to a database page. */ buf_frame_t* @@ -646,6 +706,9 @@ loop: block->accessed = TRUE; +#ifdef UNIV_DEBUG_FILE_ACCESSES + ut_a(block->file_page_was_freed == FALSE); +#endif mutex_exit(&(buf_pool->mutex)); #ifdef UNIV_DEBUG @@ -842,6 +905,9 @@ buf_page_optimistic_get_func( ut_ad(block->buf_fix_count > 0); ut_ad(block->state == BUF_BLOCK_FILE_PAGE); +#ifdef UNIV_DEBUG_FILE_ACCESSES + ut_a(block->file_page_was_freed == FALSE); +#endif if (!accessed) { /* In the case of a first access, try to apply linear read-ahead */ @@ -949,6 +1015,9 @@ buf_page_get_known_nowait( #endif ut_ad(block->buf_fix_count > 0); ut_ad(block->state == BUF_BLOCK_FILE_PAGE); +#ifdef UNIV_DEBUG_FILE_ACCESSES + ut_a(block->file_page_was_freed == FALSE); +#endif #ifdef UNIV_IBUF_DEBUG ut_a((mode == BUF_KEEP_OLD) @@ -996,6 +1065,8 @@ buf_page_init( block->n_hash_helps = 0; block->is_hashed = FALSE; + + block->file_page_was_freed = FALSE; } /************************************************************************ @@ -1126,6 +1197,8 @@ buf_page_create( #ifdef UNIV_IBUF_DEBUG ut_a(ibuf_count_get(block->space, block->offset) == 0); #endif + block->file_page_was_freed = FALSE; + /* Page can be found in buf_pool */ mutex_exit(&(buf_pool->mutex)); diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c index 90bdde1ebc6..7129b8d20a9 100644 --- a/innobase/buf/buf0flu.c +++ b/innobase/buf/buf0flu.c @@ -182,8 +182,8 @@ buf_flush_write_complete( buf_pool->LRU_flush_ended++; } -/* printf("n pending flush %lu\n", - buf_pool->n_flush[block->flush_type]); */ + /* printf("n pending flush %lu\n", + buf_pool->n_flush[block->flush_type]); */ if ((buf_pool->n_flush[block->flush_type] == 0) && (buf_pool->init_flush[block->flush_type] == FALSE)) { @@ -421,6 +421,8 @@ buf_flush_try_neighbors( /* In simulated aio we wake up the i/o-handler threads now that we have posted a batch of writes: */ + /* printf("Flush count %lu ; Waking i/o handlers\n", count); */ + os_aio_simulated_wake_handler_threads(); return(count); diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c index 4626dc2757b..142beaaaa15 100644 --- a/innobase/buf/buf0lru.c +++ b/innobase/buf/buf0lru.c @@ -260,9 +260,9 @@ loop: */ if (n_iterations > 30) { fprintf(stderr, - "Innobase: Warning: difficult to find free blocks from\n" - "Innobase: the buffer pool! Consider increasing the\n" - "Innobase: buffer pool size.\n"); + "InnoDB: Warning: difficult to find free blocks from\n" + "InnoDB: the buffer pool (%lu search iterations)! Consider\n" + "InnoDB: increasing the buffer pool size.\n", n_iterations); } } diff --git a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c index 644dd226a0e..728bf4404b8 100644 --- a/innobase/buf/buf0rea.c +++ b/innobase/buf/buf0rea.c @@ -18,6 +18,7 @@ Created 11/5/1995 Heikki Tuuri #include "log0recv.h" #include "trx0sys.h" #include "os0file.h" +#include "srv0start.h" /* The size in blocks of the area where the random read-ahead algorithm counts the accessed pages when deciding whether to read-ahead */ @@ -132,10 +133,16 @@ buf_read_ahead_random( ulint low, high; ulint i; - if (ibuf_bitmap_page(offset)) { + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); + } + + if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) { - /* If it is an ibuf bitmap page, we do no read-ahead, as - that could break the ibuf page access order */ + /* If it is an ibuf bitmap page or trx sys hdr, we do + no read-ahead, as that could break the ibuf page access + order */ return(0); } @@ -301,9 +308,16 @@ buf_read_ahead_linear( ulint low, high; ulint i; - if (ibuf_bitmap_page(offset)) { - /* If it is an ibuf bitmap page, we do no read-ahead, as - that could break the ibuf page access order */ + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); + } + + if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) { + + /* If it is an ibuf bitmap page or trx sys hdr, we do + no read-ahead, as that could break the ibuf page access + order */ return(0); } diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c index 260e8d4c276..35fdfce16a6 100644 --- a/innobase/dict/dict0boot.c +++ b/innobase/dict/dict0boot.c @@ -313,6 +313,11 @@ dict_boot(void) mtr_commit(&mtr); /*-------------------------*/ + + /* Initialize the insert buffer table and index for each tablespace */ + + ibuf_init_at_db_start(); + /* Load definitions of other indexes on system tables */ dict_load_sys_table(dict_sys->sys_tables); @@ -320,10 +325,6 @@ dict_boot(void) dict_load_sys_table(dict_sys->sys_indexes); dict_load_sys_table(dict_sys->sys_fields); - /* Initialize the insert buffer table and index for each tablespace */ - - ibuf_init_at_db_start(); - mutex_exit(&(dict_sys->mutex)); } diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index 095ca497ee2..101fb5f3ba0 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -127,11 +127,9 @@ typedef byte fseg_inode_t; page number within space, FIL_NULL means that the slot is not in use */ /*-------------------------------------*/ -#define FSEG_INODE_SIZE (16 + 3 * FLST_BASE_NODE_SIZE +\ - FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) +#define FSEG_INODE_SIZE (16 + 3 * FLST_BASE_NODE_SIZE + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) -#define FSP_SEG_INODES_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10)\ - / FSEG_INODE_SIZE) +#define FSP_SEG_INODES_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE) /* Number of segment inodes which fit on a single page */ @@ -198,8 +196,7 @@ the extent are free and which contain old tuple version to clean. */ /* File extent data structure size in bytes. The "+ 7 ) / 8" part in the definition rounds the number of bytes upward. */ -#define XDES_SIZE (XDES_BITMAP +\ - (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8) +#define XDES_SIZE (XDES_BITMAP + (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8) /* Offset of the descriptor array on a descriptor page */ #define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE) @@ -2539,6 +2536,10 @@ fseg_free_page( seg_inode = fseg_inode_get(seg_header, mtr); fseg_free_page_low(seg_inode, space, page, mtr); + +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_set_file_page_was_freed(space, page); +#endif } /************************************************************************** @@ -2602,6 +2603,14 @@ fseg_free_extent( } fsp_free_extent(space, page, mtr); + +#ifdef UNIV_DEBUG_FILE_ACCESSES + for (i = 0; i < FSP_EXTENT_SIZE; i++) { + + buf_page_set_file_page_was_freed(space, + first_page_in_extent + i); + } +#endif } /************************************************************************** @@ -2633,6 +2642,14 @@ fseg_free_step( MTR_MEMO_X_LOCK)); mtr_x_lock(fil_space_get_latch(space), mtr); + descr = xdes_get_descriptor(space, buf_frame_get_page_no(header), mtr); + + /* Check that the header resides on a page which has not been + freed yet */ + + ut_a(descr); + ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header) + % FSP_EXTENT_SIZE, mtr) == FALSE); inode = fseg_inode_get(header, mtr); descr = fseg_get_first_extent(inode, mtr); @@ -2647,7 +2664,6 @@ fseg_free_step( } /* Free a frag page */ - n = fseg_find_last_used_frag_page_slot(inode, mtr); if (n == ULINT_UNDEFINED) { @@ -2659,6 +2675,16 @@ fseg_free_step( fseg_free_page_low(inode, space, fseg_get_nth_frag_page_no(inode, n, mtr), mtr); + + n = fseg_find_last_used_frag_page_slot(inode, mtr); + + if (n == ULINT_UNDEFINED) { + /* Freeing completed: free the segment inode */ + fsp_free_seg_inode(space, inode, mtr); + + return(TRUE); + } + return(FALSE); } diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index 7227b54e71e..171c6169927 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -1382,6 +1382,9 @@ ibuf_remove_free_page( fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, space, page_no, &mtr); +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_reset_file_page_was_freed(space, page_no); +#endif ibuf_enter(); mutex_enter(&ibuf_mutex); @@ -1413,6 +1416,9 @@ ibuf_remove_free_page( ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, FALSE, &mtr); +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_set_file_page_was_freed(space, page_no); +#endif mtr_commit(&mtr); mutex_exit(&ibuf_mutex); @@ -2431,6 +2437,8 @@ ibuf_merge_or_delete_for_page( block = buf_block_align(page); rw_lock_x_lock_move_ownership(&(block->lock)); + + ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); } n_inserts = 0; diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 5e90f5952fc..7f3e20a4505 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -293,6 +293,32 @@ buf_page_peek_block( ulint space, /* in: space id */ ulint offset);/* in: page number */ /************************************************************************ +Sets file_page_was_freed TRUE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_set_file_page_was_freed( +/*=============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset); /* in: page number */ +/************************************************************************ +Sets file_page_was_freed FALSE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_reset_file_page_was_freed( +/*===============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset); /* in: page number */ +/************************************************************************ Recommends a move of a block to the start of the LRU list if there is danger of dropping from the buffer pool. NOTE: does not reserve the buffer pool mutex. */ @@ -706,6 +732,9 @@ struct buf_block_struct{ which bufferfixes the block acquires an s-latch here; so we can use the debug utilities in sync0rw */ + ibool file_page_was_freed; + /* this is set to TRUE when fsp + frees a page in buffer pool */ }; /* The buffer pool structure. NOTE! The definition appears here only for diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h index 66eeb4f2e3c..e2b20f3b5fc 100644 --- a/innobase/include/srv0start.h +++ b/innobase/include/srv0start.h @@ -28,4 +28,7 @@ int innobase_shutdown_for_mysql(void); /*=============================*/ /* out: DB_SUCCESS or error code */ + +extern ibool srv_startup_is_before_trx_rollback_phase; + #endif diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic index b58d024bf6c..f7b341cb386 100644 --- a/innobase/include/sync0sync.ic +++ b/innobase/include/sync0sync.ic @@ -86,7 +86,7 @@ mutex_test_and_set( /* mutex_fence(); */ return(res); -#elif defined(__GNUC__) && defined(UNIV_INTEL_X86) +#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86) ulint* lw; ulint res; @@ -134,7 +134,7 @@ mutex_reset_lock_word( __asm MOV EDX, 0 __asm MOV ECX, lw __asm XCHG EDX, DWORD PTR [ECX] -#elif defined(__GNUC__) && defined(UNIV_INTEL_X86) +#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86) ulint* lw; lw = &(mutex->lock_word); diff --git a/innobase/include/univ.i b/innobase/include/univ.i index d29ca83b0fc..73bf48b1bc0 100644 --- a/innobase/include/univ.i +++ b/innobase/include/univ.i @@ -9,6 +9,8 @@ Created 1/20/1994 Heikki Tuuri #ifndef univ_i #define univ_i +#undef UNIV_INTEL_X86 + #if (defined(_WIN32) || defined(_WIN64)) && !defined(MYSQL_SERVER) #define __WIN__ #include <windows.h> @@ -72,6 +74,8 @@ subdirectory of 'mysql'. */ #define UNIV_SYNC_PERF_STAT #define UNIV_SEARCH_PERF_STAT + +#define UNIV_DEBUG_FILE_ACCESSES */ #define UNIV_LIGHT_MEM_DEBUG diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index e93cd3f0364..d16085a2d6f 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -944,9 +944,9 @@ recv_read_in_area( } buf_read_recv_pages(FALSE, space, page_nos, n); - - /* printf("Recv pages at %lu n %lu\n", page_nos[0], n); */ - + /* + printf("Recv pages at %lu n %lu\n", page_nos[0], n); + */ return(n); } diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index fa0c266a82a..668d74d75b5 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri #include "ut0mem.h" #include "srv0srv.h" +#undef HAVE_FDATASYNC #ifdef POSIX_ASYNC_IO /* We assume in this case that the OS has standard Posix aio (at least SunOS @@ -562,6 +563,11 @@ os_file_flush( return(TRUE); } + fprintf(stderr, + "InnoDB: Error: the OS said file flush did not succeed\n"); + + os_file_handle_error(file, NULL); + return(FALSE); #endif } diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index b40e026c675..8e1a584f667 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -24,6 +24,7 @@ Created 9/17/2000 Heikki Tuuri #include "trx0roll.h" #include "trx0purge.h" #include "lock0lock.h" +#include "rem0cmp.h" /*********************************************************************** Reads a MySQL format variable-length field (like VARCHAR) length and @@ -823,7 +824,11 @@ row_create_table_for_mysql( } else { assert(err == DB_DUPLICATE_KEY); fprintf(stderr, - "Innobase: error: table %s already exists in Innobase data dictionary\n", + "InnoDB: Error: table %s already exists in InnoDB internal\n" + "InnoDB: data dictionary. Have you deleted the .frm file\n" + "InnoDB: and not used DROP TABLE? Have you used DROP DATABASE\n" + "InnoDB: for InnoDB tables in MySQL version <= 3.23.39?\n" + "InnoDB: See the Restrictions section of the InnoDB manual.\n", table->name); } diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index 70cf0fe5a32..c8db428bade 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -361,6 +361,7 @@ row_undo_mod_del_unmark_sec( btr_cur_t* btr_cur; ulint err; ibool found; + char* err_buf; UT_NOT_USED(node); @@ -369,13 +370,31 @@ row_undo_mod_del_unmark_sec( found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); - ut_a(found); + if (!found) { + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); - btr_cur = btr_pcur_get_btr_cur(&pcur); + fprintf(stderr, "InnoDB: error in sec index entry del undo in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); + + rec_sprintf(err_buf, 900, btr_pcur_get_rec(&pcur)); + fprintf(stderr, "InnoDB: record %s\n", err_buf); + + fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + + mem_free(err_buf); - err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, + } else { + + btr_cur = btr_pcur_get_btr_cur(&pcur); + + err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, btr_cur, FALSE, thr, &mtr); - ut_ad(err == DB_SUCCESS); + ut_ad(err == DB_SUCCESS); + } btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 10dd64b8b1a..5bca2a24c01 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -750,6 +750,7 @@ row_upd_sec_index_entry( btr_cur_t* btr_cur; mem_heap_t* heap; rec_t* rec; + char* err_buf; ulint err = DB_SUCCESS; index = node->index; @@ -764,18 +765,37 @@ row_upd_sec_index_entry( found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); - ut_ad(found); - btr_cur = btr_pcur_get_btr_cur(&pcur); rec = btr_cur_get_rec(btr_cur); - /* Delete mark the old index record; it can already be delete marked if - we return after a lock wait in row_ins_index_entry below */ + if (!found) { + + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); + + fprintf(stderr, "InnoDB: error in sec index entry update in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); + + rec_sprintf(err_buf, 900, rec); + fprintf(stderr, "InnoDB: record %s\n", err_buf); + + fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + + mem_free(err_buf); + } else { + + /* Delete mark the old index record; it can already be + delete marked if we return after a lock wait in + row_ins_index_entry below */ - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); + } } btr_pcur_close(&pcur); diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index b584b663e43..80fafa37adf 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -56,6 +56,7 @@ Created 2/16/1996 Heikki Tuuri #include "srv0start.h" #include "que0que.h" +ibool srv_startup_is_before_trx_rollback_phase = FALSE; ibool srv_is_being_started = FALSE; ibool srv_was_started = FALSE; @@ -531,6 +532,7 @@ innobase_start_or_create_for_mysql(void) /* yydebug = TRUE; */ srv_is_being_started = TRUE; + srv_startup_is_before_trx_rollback_phase = TRUE; if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) { srv_unix_file_flush_method = SRV_UNIX_FDATASYNC; @@ -548,6 +550,9 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } + /* + printf("srv_unix set to %lu\n", srv_unix_file_flush_method); + */ os_aio_use_native_aio = srv_use_native_aio; err = srv_boot(); @@ -728,6 +733,7 @@ innobase_start_or_create_for_mysql(void) trx_sys_create(); dict_create(); + srv_startup_is_before_trx_rollback_phase = FALSE; } else if (srv_archive_recovery) { fprintf(stderr, @@ -742,9 +748,15 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } - trx_sys_init_at_db_start(); + /* Since ibuf init is in dict_boot, and ibuf is needed + in any disk i/o, first call dict_boot */ + dict_boot(); + + trx_sys_init_at_db_start(); + srv_startup_is_before_trx_rollback_phase = FALSE; + recv_recovery_from_archive_finish(); } else { /* We always try to do a recovery, even if the database had @@ -759,12 +771,15 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } - trx_sys_init_at_db_start(); + /* Since ibuf init is in dict_boot, and ibuf is needed + in any disk i/o, first call dict_boot */ dict_boot(); + trx_sys_init_at_db_start(); /* The following needs trx lists which are initialized in trx_sys_init_at_db_start */ - + + srv_startup_is_before_trx_rollback_phase = FALSE; recv_recovery_from_checkpoint_finish(); } diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c index 7153355d2a9..c3a1ac3b47f 100644 --- a/innobase/sync/sync0sync.c +++ b/innobase/sync/sync0sync.c @@ -166,6 +166,46 @@ struct sync_level_struct{ ulint level; /* level of the latch in the latching order */ }; + +#if defined(__GNUC__) && defined(UNIV_INTEL_X86) + +ulint +sync_gnuc_intelx86_test_and_set( + /* out: old value of the lock word */ + ulint* lw) /* in: pointer to the lock word */ +{ + ulint res; + + /* In assembly we use the so-called AT & T syntax where + the order of operands is inverted compared to the ordinary Intel + syntax. The 'l' after the mnemonics denotes a 32-bit operation. + The line after the code tells which values come out of the asm + code, and the second line tells the input to the asm code. */ + + asm volatile("movl $1, %%eax; xchgl (%%ecx), %%eax" : + "=eax" (res), "=m" (*lw) : + "ecx" (lw)); + return(res); +} + +void +sync_gnuc_intelx86_reset( + ulint* lw) /* in: pointer to the lock word */ +{ + /* In assembly we use the so-called AT & T syntax where + the order of operands is inverted compared to the ordinary Intel + syntax. The 'l' after the mnemonics denotes a 32-bit operation. */ + + asm volatile("movl $0, %%eax; xchgl (%%ecx), %%eax" : + "=m" (*lw) : + "ecx" (lw) : + "eax"); /* gcc does not seem to understand + that our asm code resets eax: tell it + explicitly that after the third ':' */ +} + +#endif + /********************************************************************** Creates, or rather, initializes a mutex object in a specified memory location (which must be appropriately aligned). The mutex is initialized diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index efee02c4cad..1f408428582 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -613,6 +613,10 @@ trx_undo_insert_header_reuse( /* Insert undo data is not needed after commit: we may free all the space on the page */ + ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_INSERT); + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); @@ -800,7 +804,7 @@ trx_undo_free_page( ulint hist_size; UT_NOT_USED(hdr_offset); - ut_ad(hdr_page_no != page_no); + ut_a(hdr_page_no != page_no); ut_ad(!mutex_own(&kernel_mutex)); ut_ad(mutex_own(&(rseg->mutex))); @@ -1411,6 +1415,10 @@ trx_undo_reuse_cached( if (type == TRX_UNDO_INSERT) { offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); } else { + ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_UPDATE); + offset = trx_undo_header_create(undo_page, trx_id, mtr); } diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c index 06d34c10659..dd23e214ccf 100644 --- a/myisam/myisamchk.c +++ b/myisam/myisamchk.c @@ -201,7 +201,7 @@ static struct option long_options[] = static void print_version(void) { - printf("%s Ver 1.46 for %s at %s\n",my_progname,SYSTEM_TYPE, + printf("%s Ver 1.47 for %s at %s\n",my_progname,SYSTEM_TYPE, MACHINE_TYPE); } @@ -234,7 +234,8 @@ static void usage(void) -F, --fast Check only tables that hasn't been closed properly\n\ -C, --check-only-changed\n\ Check only tables that has changed since last check\n\ - -f, --force Restart with -r if there are any errors in the table\n\ + -f, --force Restart with -r if there are any errors in the table.\n\ + States will be updated as with --update-state\n\ -i, --information Print statistics information about table that is checked\n\ -m, --medium-check Faster than extended-check, but only finds 99.99% of\n\ all errors. Should be good enough for most cases\n\ @@ -359,7 +360,7 @@ static void get_options(register int *argc,register char ***argv) break; case 'f': check_param.tmpfile_createflag= O_RDWR | O_TRUNC; - check_param.testflag|=T_FORCE_CREATE; + check_param.testflag|= T_FORCE_CREATE | T_UPDATE_STATE; break; case 'F': check_param.testflag|=T_FAST; diff --git a/mysql-test/r/bdb.result b/mysql-test/r/bdb.result index 5e227313e4a..c88b7375aec 100644 --- a/mysql-test/r/bdb.result +++ b/mysql-test/r/bdb.result @@ -509,3 +509,5 @@ id id3 1 1 2 2 100 2 +KINMU_DATE +KINMU_DATE diff --git a/mysql-test/r/bigint.result b/mysql-test/r/bigint.result index 08e21b279bc..46ce0fda2c1 100644 --- a/mysql-test/r/bigint.result +++ b/mysql-test/r/bigint.result @@ -5,5 +5,11 @@ +9999999999999999999 -9999999999999999999 10000000000000000000 -10000000000000000000 a +18446744073709551614 18446744073709551615 +a +18446744073709551615 +a 18446744073709551615 +a +18446744073709551614 diff --git a/mysql-test/r/count_distinct.result b/mysql-test/r/count_distinct.result index 6fc10f590ec..97d7b57f249 100644 --- a/mysql-test/r/count_distinct.result +++ b/mysql-test/r/count_distinct.result @@ -7,3 +7,5 @@ isbn city libname a isbn city libname a 007 Berkeley Berkeley Public1 2 000 New York New York Public Libra 2 +f1 count(distinct t2.f2) count(distinct 1,NULL) +1 0 0 diff --git a/mysql-test/r/drop.result b/mysql-test/r/drop.result index 67923fe903c..741fc6bba85 100644 --- a/mysql-test/r/drop.result +++ b/mysql-test/r/drop.result @@ -1,2 +1,11 @@ n 1 +n +4 +Database +foo +mysql +test +Database +mysql +test diff --git a/mysql-test/r/flush.result b/mysql-test/r/flush.result index e34f4f67195..fca84de710c 100644 --- a/mysql-test/r/flush.result +++ b/mysql-test/r/flush.result @@ -1,4 +1,6 @@ n 3 n +23 +n 345 diff --git a/mysql-test/r/show_check.result b/mysql-test/r/show_check.result index ce2e5d4f58d..099ea2fa109 100644 --- a/mysql-test/r/show_check.result +++ b/mysql-test/r/show_check.result @@ -80,3 +80,13 @@ t1 CREATE TABLE `t1` ( `test_set` set('val1','val2','val3') NOT NULL default '', `name` char(20) default 'O''Brien' ) TYPE=MyISAM COMMENT='it''s a table' +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL default '0', + UNIQUE KEY `aa` (`a`) +) TYPE=MyISAM +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL default '0', + PRIMARY KEY (`a`) +) TYPE=MyISAM diff --git a/mysql-test/t/bdb.test b/mysql-test/t/bdb.test index 96296e238fd..c61a6c7197d 100644 --- a/mysql-test/t/bdb.test +++ b/mysql-test/t/bdb.test @@ -705,3 +705,13 @@ commit; select id,id3 from t1; UNLOCK TABLES; DROP TABLE t1; + +# +# Test with empty tables (crashed with lock error) +# + +CREATE TABLE t1 (SYAIN_NO char(5) NOT NULL default '', KINMU_DATE char(6) NOT NULL default '', PRIMARY KEY (SYAIN_NO,KINMU_DATE)) TYPE=BerkeleyDB; +CREATE TABLE t2 ( SYAIN_NO char(5) NOT NULL default '',STR_DATE char(8) NOT NULL default '',PRIMARY KEY (SYAIN_NO,STR_DATE) ) TYPE=BerkeleyDB; +select T1.KINMU_DATE from t1 T1 ,t2 T2 where T1.SYAIN_NO = '12345' and T1.KINMU_DATE = '200106' and T2.SYAIN_NO = T1.SYAIN_NO; +select T1.KINMU_DATE from t1 T1 ,t2 T2 where T1.SYAIN_NO = '12345' and T1.KINMU_DATE = '200106' and T2.SYAIN_NO = T1.SYAIN_NO; +DROP TABLE t1,t2; diff --git a/mysql-test/t/bigint.test b/mysql-test/t/bigint.test index 9a819463f3f..6470b6f6a30 100644 --- a/mysql-test/t/bigint.test +++ b/mysql-test/t/bigint.test @@ -6,7 +6,11 @@ select 9223372036854775807,-009223372036854775808; select +9999999999999999999,-9999999999999999999; drop table if exists t1; -create table t1 (a bigint unsigned); -insert into t1 values (18446744073709551615), (0xFFFFFFFFFFFFFFFF); +create table t1 (a bigint unsigned not null, primary key(a)); +insert into t1 values (18446744073709551615), (0xFFFFFFFFFFFFFFFE); +select * from t1; +select * from t1 where a=18446744073709551615; +select * from t1 where a='18446744073709551615'; +delete from t1 where a=18446744073709551615; select * from t1; drop table t1; diff --git a/mysql-test/t/count_distinct.test b/mysql-test/t/count_distinct.test index 1afb548c2ad..3d795d44821 100644 --- a/mysql-test/t/count_distinct.test +++ b/mysql-test/t/count_distinct.test @@ -32,3 +32,13 @@ insert into t1 values ('NYC Lib','New York'); select t2.isbn,city,t1.libname,count(t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city,t1.libname; select t2.isbn,city,t1.libname,count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; drop table t1, t2, t3; + +# +# Problem with LEFT JOIN +# + +create table t1 (f1 int); +insert into t1 values (1); +create table t2 (f1 int,f2 int); +select t1.f1,count(distinct t2.f2),count(distinct 1,NULL) from t1 left join t2 on t1.f1=t2.f1 group by t1.f1; +drop table t1,t2; diff --git a/mysql-test/t/drop.test b/mysql-test/t/drop.test index 1de387f6e4c..2a45fe8253b 100644 --- a/mysql-test/t/drop.test +++ b/mysql-test/t/drop.test @@ -10,3 +10,34 @@ insert into t1 values(2); create table t1(n int); drop table t1; select * from t1; + +#now test for a bug in drop database - it is important that the name +#of the table is the same as the name of the database - in the original +#code this triggered a bug +drop database if exists foo; +create database foo; +drop database if exists foo; +create database foo; +create table foo.foo (n int); +insert into foo.foo values (4); +select * from foo.foo; +drop database if exists foo; +create database foo; +drop database foo; + +# test drop/create database and FLUSH TABLES WITH READ LOCK +drop database if exists foo; +flush tables with read lock; +--error 1209 +create database foo; +unlock tables; +create database foo; +show databases; +flush tables with read lock; +--error 1208 +drop database foo; +unlock tables; +drop database foo; +show databases; + + diff --git a/mysql-test/t/flush.test b/mysql-test/t/flush.test index b7293307605..4491de1f82b 100644 --- a/mysql-test/t/flush.test +++ b/mysql-test/t/flush.test @@ -33,6 +33,21 @@ unlock tables; connection con1; reap; +#test if drop database will wait until we release the global read lock +connection con1; +drop database if exists foo; +create database foo; +create table foo.t1(n int); +insert into foo.t1 values (23); +flush tables with read lock; +connection con2; +send drop database foo; +connection con1; +select * from foo.t1; +unlock tables; +connection con2; +reap; + # test if dirty close releases global read lock connection con1; create table t1 (n int); diff --git a/mysql-test/t/show_check.test b/mysql-test/t/show_check.test index f4f58c8c885..476d8dcdf0d 100644 --- a/mysql-test/t/show_check.test +++ b/mysql-test/t/show_check.test @@ -65,3 +65,10 @@ create table t1 ( ) comment = 'it\'s a table' ; show create table t1 ; drop table t1; + +create table t1 (a int not null, unique aa (a)); +show create table t1; +drop table t1; +create table t1 (a int not null, primary key (a)); +show create table t1; +drop table t1; diff --git a/mysql-test/t/type_float.test b/mysql-test/t/type_float.test index 1496170a256..23941ad2913 100644 --- a/mysql-test/t/type_float.test +++ b/mysql-test/t/type_float.test @@ -3,7 +3,8 @@ # Numeric floating point. SELECT 10,10.0,10.,.1e+2,100.0e-1; -select 6e-05, -6e-05, --6e-05, -6e-05+1.000000; +SELECT 6e-05, -6e-05, --6e-05, -6e-05+1.000000; +SELECT 1e1,1.e1,1.0e1,1e+1,1.e+1,1.0e+1,1e-1,1.e-1,1.0e-1; drop table if exists t1; create table t1 (f1 float(24),f2 float(52)); diff --git a/sql-bench/Comments/postgres.benchmark b/sql-bench/Comments/postgres.benchmark index b25a9931f9e..c52a53699e0 100644 --- a/sql-bench/Comments/postgres.benchmark +++ b/sql-bench/Comments/postgres.benchmark @@ -11,24 +11,26 @@ # Another time vacuum() filled our system disk with had 6G free # while vaccuming a table of 60 M. # -# We have sent a mail about this to the PostgreSQL mailing list, so -# the PostgreSQL developers should be aware of these problems and should -# hopefully fix this soon. -# # WARNING # The test was run on a Intel Xeon 2x 550 Mzh machine with 1G memory, -# 9G hard disk. The OS is Suse 7.1, with Linux 2.4.0 compiled with SMP +# 9G hard disk. The OS is Suse 7.1, with Linux 2.4.2 compiled with SMP # support # Both the perl client and the database server is run # on the same machine. No other cpu intensive process was used during # the benchmark. +# +# During the test we run PostgreSQL with -o -F, not async mode (not ACID safe) +# because when we started postmaster without -o -F, PostgreSQL log files +# filled up a 9G disk until postmaster crashed. +# We did however notice that with -o -F, PostgreSQL was a magnitude slower +# than when not using -o -F. -# First, install postgresql-7.1.1.tar.gz +# +# First, install postgresql-7.1.2.tar.gz # Adding the following lines to your ~/.bash_profile or # corresponding file. If you are using csh, use ´setenv´. -# export POSTGRES_INCLUDE=/usr/local/pg/include export POSTGRES_LIB=/usr/local/pg/lib @@ -62,7 +64,7 @@ su - postgres exit # -# Second, install packages DBD-Pg-1.00.tar.gz and DBI-1.14.tar.gz, +# Second, install packages DBD-Pg-1.00.tar.gz and DBI-1.18.tar.gz, # available from http://www.perl.com/CPAN/ export POSTGRES_LIB=/usr/local/pg/lib/ @@ -82,6 +84,7 @@ run-all-tests --comment="Intel Xeon, 2x550 Mhz, 512M, pg started with -o -F" --u # When running with --fast we run the following vacuum commands on # the database between each major update of the tables: +# vacuum anlyze table # vacuum table # or # vacuum analyze diff --git a/sql-bench/Results/ATIS-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/ATIS-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index 84ff70751d3..00000000000 --- a/sql-bench/Results/ATIS-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,20 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 19:26:17 - -ATIS table test - -Creating tables -Time for create_table (28): 0 wallclock secs ( 0.01 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.01 CPU) - -Inserting data -Time to insert (9768): 3 wallclock secs ( 0.45 usr 0.44 sys + 0.00 cusr 0.00 csys = 0.89 CPU) - -Retrieving data -Time for select_simple_join (500): 3 wallclock secs ( 0.68 usr 0.19 sys + 0.00 cusr 0.00 csys = 0.87 CPU) -Time for select_join (100): 3 wallclock secs ( 0.51 usr 0.20 sys + 0.00 cusr 0.00 csys = 0.71 CPU) -Time for select_key_prefix_join (100): 13 wallclock secs ( 4.08 usr 2.01 sys + 0.00 cusr 0.00 csys = 6.09 CPU) -Time for select_distinct (800): 15 wallclock secs ( 1.75 usr 0.69 sys + 0.00 cusr 0.00 csys = 2.44 CPU) -Time for select_group (2600): 20 wallclock secs ( 1.57 usr 0.41 sys + 0.00 cusr 0.00 csys = 1.98 CPU) - -Removing tables -Time to drop_table (28): 0 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) -Total time: 57 wallclock secs ( 9.06 usr 3.94 sys + 0.00 cusr 0.00 csys = 13.00 CPU) diff --git a/sql-bench/Results/RUN-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/RUN-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index b7112793495..00000000000 --- a/sql-bench/Results/RUN-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,101 +0,0 @@ -Benchmark DBD suite: 2.12 -Date of test: 2001-06-05 19:27:31 -Running tests on: Linux 2.4.0-64GB-SMP i686 -Arguments: -Comments: Intel Xeon, 2x550 Mhz, 512M, key_buffer=16M -Limits from: mysql,pg -Server version: MySQL 3.23.39 - -ATIS: Total time: 57 wallclock secs ( 9.06 usr 3.94 sys + 0.00 cusr 0.00 csys = 13.00 CPU) -alter-table: Total time: 271 wallclock secs ( 0.18 usr 0.02 sys + 0.00 cusr 0.00 csys = 0.20 CPU) -big-tables: Total time: 33 wallclock secs ( 9.40 usr 7.64 sys + 0.00 cusr 0.00 csys = 17.04 CPU) -connect: Total time: 86 wallclock secs (33.98 usr 18.10 sys + 0.00 cusr 0.00 csys = 52.08 CPU) -create: Total time: 103 wallclock secs ( 7.83 usr 3.60 sys + 0.00 cusr 0.00 csys = 11.43 CPU) -insert: Total time: 2736 wallclock secs (661.21 usr 182.47 sys + 0.00 cusr 0.00 csys = 843.68 CPU) -select: Total time: 1949 wallclock secs (70.03 usr 16.42 sys + 0.00 cusr 0.00 csys = 86.45 CPU) -wisconsin: Total time: 19 wallclock secs ( 3.92 usr 1.70 sys + 0.00 cusr 0.00 csys = 5.62 CPU) - -All 8 test executed successfully - -Totals per operation: -Operation seconds usr sys cpu tests -alter_table_add 261.00 0.13 0.02 0.15 992 -connect 16.00 6.84 2.50 9.34 10000 -connect+select_1_row 15.00 7.11 3.70 10.81 10000 -connect+select_simple 13.00 6.70 3.21 9.91 10000 -count 45.00 0.01 0.00 0.01 100 -count_distinct 60.00 0.42 0.08 0.50 1000 -count_distinct_2 63.00 0.18 0.03 0.21 1000 -count_distinct_big 165.00 7.78 3.16 10.94 120 -count_distinct_group 194.00 1.21 0.37 1.58 1000 -count_distinct_group_on_key 59.00 0.51 0.07 0.58 1000 -count_distinct_group_on_key_parts 194.00 1.12 0.46 1.58 1000 -count_distinct_key_prefix 51.00 0.45 0.08 0.53 1000 -count_group_on_key_parts 58.00 1.16 0.35 1.51 1000 -count_on_key 586.00 16.61 2.71 19.32 50100 -create+drop 33.00 2.94 0.82 3.76 10000 -create_MANY_tables 18.00 1.02 0.62 1.64 5000 -create_index 5.00 0.00 0.00 0.00 8 -create_key+drop 41.00 3.05 0.66 3.71 10000 -create_table 0.00 0.01 0.00 0.01 31 -delete_all 17.00 0.00 0.00 0.00 12 -delete_all_many_keys 75.00 0.03 0.00 0.03 1 -delete_big 1.00 0.00 0.00 0.00 1 -delete_big_many_keys 75.00 0.03 0.00 0.03 128 -delete_key 4.00 0.76 0.29 1.05 10000 -drop_index 5.00 0.00 0.00 0.00 8 -drop_table 0.00 0.00 0.00 0.00 28 -drop_table_when_MANY_tables 6.00 0.37 0.63 1.00 5000 -insert 144.00 24.06 14.28 38.34 350768 -insert_duplicates 31.00 5.06 3.72 8.78 100000 -insert_key 137.00 9.91 6.26 16.17 100000 -insert_many_fields 10.00 0.54 0.08 0.62 2000 -insert_select_1_key 7.00 0.00 0.00 0.00 1 -insert_select_2_keys 9.00 0.00 0.00 0.00 1 -min_max 30.00 0.04 0.01 0.05 60 -min_max_on_key 230.00 28.28 4.43 32.71 85000 -order_by_big 78.00 22.39 9.83 32.22 10 -order_by_big_key 33.00 23.35 10.15 33.50 10 -order_by_big_key2 32.00 22.53 9.81 32.34 10 -order_by_big_key_desc 36.00 23.47 10.27 33.74 10 -order_by_big_key_diff 74.00 22.66 9.76 32.42 10 -order_by_big_key_prefix 33.00 22.18 9.81 31.99 10 -order_by_key2_diff 9.00 1.30 0.85 2.15 500 -order_by_key_prefix 4.00 0.97 0.57 1.54 500 -order_by_range 8.00 1.26 0.49 1.75 500 -outer_join 110.00 0.00 0.00 0.00 10 -outer_join_found 107.00 0.00 0.00 0.00 10 -outer_join_not_found 59.00 0.00 0.00 0.00 500 -outer_join_on_key 60.00 0.00 0.00 0.00 10 -select_1_row 3.00 0.81 0.69 1.50 10000 -select_2_rows 3.00 0.67 0.63 1.30 10000 -select_big 63.00 32.72 16.55 49.27 10080 -select_column+column 4.00 0.52 0.46 0.98 10000 -select_diff_key 193.00 0.32 0.04 0.36 500 -select_distinct 15.00 1.75 0.69 2.44 800 -select_group 75.00 1.59 0.45 2.04 2711 -select_group_when_MANY_tables 5.00 0.43 0.87 1.30 5000 -select_join 3.00 0.51 0.20 0.71 100 -select_key 132.00 53.98 10.53 64.51 200000 -select_key2 139.00 78.61 11.08 89.69 200000 -select_key2_return_key 131.00 64.58 9.61 74.19 200000 -select_key2_return_prim 134.00 72.33 11.34 83.67 200000 -select_key_prefix 141.00 86.32 12.05 98.37 200000 -select_key_prefix_join 13.00 4.08 2.01 6.09 100 -select_key_return_key 125.00 59.92 12.00 71.92 200000 -select_many_fields 23.00 8.85 7.55 16.40 2000 -select_query_cache 120.00 3.67 0.53 4.20 10000 -select_query_cache2 120.00 3.80 0.57 4.37 10000 -select_range 201.00 9.05 3.95 13.00 410 -select_range_key2 21.00 7.15 1.40 8.55 25010 -select_range_prefix 22.00 6.55 1.40 7.95 25010 -select_simple 2.00 0.54 0.49 1.03 10000 -select_simple_join 3.00 0.68 0.19 0.87 500 -update_big 64.00 0.00 0.00 0.00 10 -update_of_key 25.00 2.62 1.44 4.06 50000 -update_of_key_big 35.00 0.05 0.04 0.09 501 -update_of_primary_key_many_keys 47.00 0.01 0.02 0.03 256 -update_with_key 119.00 18.44 12.64 31.08 300000 -update_with_key_prefix 36.00 6.23 3.85 10.08 100000 -wisc_benchmark 5.00 2.33 0.52 2.85 114 -TOTALS 5323.00 795.55 233.87 1029.42 2551551 diff --git a/sql-bench/Results/alter-table-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/alter-table-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index bd9506b44c2..00000000000 --- a/sql-bench/Results/alter-table-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,14 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 13:47:22 - -Testing of ALTER TABLE -Testing with 1000 columns and 1000 rows in 20 steps -Insert data into the table -Time for insert (1000) 0 wallclock secs ( 0.05 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.05 CPU) - -Time for alter_table_add (992): 261 wallclock secs ( 0.13 usr 0.02 sys + 0.00 cusr 0.00 csys = 0.15 CPU) - -Time for create_index (8): 5 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Time for drop_index (8): 5 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Total time: 271 wallclock secs ( 0.18 usr 0.02 sys + 0.00 cusr 0.00 csys = 0.20 CPU) diff --git a/sql-bench/Results/big-tables-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/big-tables-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index ff6f41e1bad..00000000000 --- a/sql-bench/Results/big-tables-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,19 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 13:51:53 - -Testing of some unusual tables -All tests are done 1000 times with 1000 fields - -Testing table with 1000 fields -Testing select * from table with 1 record -Time to select_many_fields(1000): 10 wallclock secs ( 4.43 usr 4.17 sys + 0.00 cusr 0.00 csys = 8.60 CPU) - -Testing select all_fields from table with 1 record -Time to select_many_fields(1000): 13 wallclock secs ( 4.42 usr 3.38 sys + 0.00 cusr 0.00 csys = 7.80 CPU) - -Testing insert VALUES() -Time to insert_many_fields(1000): 3 wallclock secs ( 0.46 usr 0.03 sys + 0.00 cusr 0.00 csys = 0.49 CPU) - -Testing insert (all_fields) VALUES() -Time to insert_many_fields(1000): 7 wallclock secs ( 0.08 usr 0.05 sys + 0.00 cusr 0.00 csys = 0.13 CPU) - -Total time: 33 wallclock secs ( 9.40 usr 7.64 sys + 0.00 cusr 0.00 csys = 17.04 CPU) diff --git a/sql-bench/Results/connect-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/connect-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index 29c5ea5e2de..00000000000 --- a/sql-bench/Results/connect-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,30 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 13:52:26 - -Testing the speed of connecting to the server and sending of data -All tests are done 10000 times - -Testing connection/disconnect -Time to connect (10000): 16 wallclock secs ( 6.84 usr 2.50 sys + 0.00 cusr 0.00 csys = 9.34 CPU) - -Test connect/simple select/disconnect -Time for connect+select_simple (10000): 13 wallclock secs ( 6.70 usr 3.21 sys + 0.00 cusr 0.00 csys = 9.91 CPU) - -Test simple select -Time for select_simple (10000): 2 wallclock secs ( 0.54 usr 0.49 sys + 0.00 cusr 0.00 csys = 1.03 CPU) - -Testing connect/select 1 row from table/disconnect -Time to connect+select_1_row (10000): 15 wallclock secs ( 7.11 usr 3.70 sys + 0.00 cusr 0.00 csys = 10.81 CPU) - -Testing select 1 row from table -Time to select_1_row (10000): 3 wallclock secs ( 0.81 usr 0.69 sys + 0.00 cusr 0.00 csys = 1.50 CPU) - -Testing select 2 rows from table -Time to select_2_rows (10000): 3 wallclock secs ( 0.67 usr 0.63 sys + 0.00 cusr 0.00 csys = 1.30 CPU) - -Test select with aritmetic (+) -Time for select_column+column (10000): 4 wallclock secs ( 0.52 usr 0.46 sys + 0.00 cusr 0.00 csys = 0.98 CPU) - -Testing retrieval of big records (65000 bytes) -Time to select_big (10000): 30 wallclock secs (10.79 usr 6.41 sys + 0.00 cusr 0.00 csys = 17.20 CPU) - -Total time: 86 wallclock secs (33.98 usr 18.10 sys + 0.00 cusr 0.00 csys = 52.08 CPU) diff --git a/sql-bench/Results/create-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/create-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index 15ffc740a83..00000000000 --- a/sql-bench/Results/create-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,18 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 13:53:52 - -Testing the speed of creating and droping tables -Testing with 5000 tables and 10000 loop count - -Testing create of tables -Time for create_MANY_tables (5000): 18 wallclock secs ( 1.02 usr 0.62 sys + 0.00 cusr 0.00 csys = 1.64 CPU) - -Accessing tables -Time to select_group_when_MANY_tables (5000): 5 wallclock secs ( 0.43 usr 0.87 sys + 0.00 cusr 0.00 csys = 1.30 CPU) - -Testing drop -Time for drop_table_when_MANY_tables (5000): 6 wallclock secs ( 0.37 usr 0.63 sys + 0.00 cusr 0.00 csys = 1.00 CPU) - -Testing create+drop -Time for create+drop (10000): 33 wallclock secs ( 2.94 usr 0.82 sys + 0.00 cusr 0.00 csys = 3.76 CPU) -Time for create_key+drop (10000): 41 wallclock secs ( 3.05 usr 0.66 sys + 0.00 cusr 0.00 csys = 3.71 CPU) -Total time: 103 wallclock secs ( 7.83 usr 3.60 sys + 0.00 cusr 0.00 csys = 11.43 CPU) diff --git a/sql-bench/Results/insert-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/insert-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index 9aae3dc3270..00000000000 --- a/sql-bench/Results/insert-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,85 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 13:55:36 - -Testing the speed of inserting data into 1 table and do some selects on it. -The tests are done with a table that has 100000 rows. - -Generating random keys -Creating tables -Inserting 100000 rows in order -Inserting 100000 rows in reverse order -Inserting 100000 rows in random order -Time for insert (300000): 123 wallclock secs (21.22 usr 12.32 sys + 0.00 cusr 0.00 csys = 33.54 CPU) - -Testing insert of duplicates -Time for insert_duplicates (100000): 31 wallclock secs ( 5.06 usr 3.72 sys + 0.00 cusr 0.00 csys = 8.78 CPU) - -Retrieving data from the table -Time for select_big (10:3000000): 32 wallclock secs (21.78 usr 10.07 sys + 0.00 cusr 0.00 csys = 31.85 CPU) -Time for order_by_big_key (10:3000000): 33 wallclock secs (23.35 usr 10.15 sys + 0.00 cusr 0.00 csys = 33.50 CPU) -Time for order_by_big_key_desc (10:3000000): 36 wallclock secs (23.47 usr 10.27 sys + 0.00 cusr 0.00 csys = 33.74 CPU) -Time for order_by_big_key_prefix (10:3000000): 33 wallclock secs (22.18 usr 9.81 sys + 0.00 cusr 0.00 csys = 31.99 CPU) -Time for order_by_big_key2 (10:3000000): 32 wallclock secs (22.53 usr 9.81 sys + 0.00 cusr 0.00 csys = 32.34 CPU) -Time for order_by_big_key_diff (10:3000000): 74 wallclock secs (22.66 usr 9.76 sys + 0.00 cusr 0.00 csys = 32.42 CPU) -Time for order_by_big (10:3000000): 78 wallclock secs (22.39 usr 9.83 sys + 0.00 cusr 0.00 csys = 32.22 CPU) -Time for order_by_range (500:125750): 8 wallclock secs ( 1.26 usr 0.49 sys + 0.00 cusr 0.00 csys = 1.75 CPU) -Time for order_by_key_prefix (500:125750): 4 wallclock secs ( 0.97 usr 0.57 sys + 0.00 cusr 0.00 csys = 1.54 CPU) -Time for order_by_key2_diff (500:250500): 9 wallclock secs ( 1.30 usr 0.85 sys + 0.00 cusr 0.00 csys = 2.15 CPU) -Time for select_diff_key (500:1000): 193 wallclock secs ( 0.32 usr 0.04 sys + 0.00 cusr 0.00 csys = 0.36 CPU) -Time for select_range_prefix (5010:42084): 13 wallclock secs ( 2.55 usr 0.51 sys + 0.00 cusr 0.00 csys = 3.06 CPU) -Time for select_range_key2 (5010:42084): 12 wallclock secs ( 2.81 usr 0.68 sys + 0.00 cusr 0.00 csys = 3.49 CPU) -Time for select_key_prefix (200000): 141 wallclock secs (86.32 usr 12.05 sys + 0.00 cusr 0.00 csys = 98.37 CPU) -Time for select_key (200000): 132 wallclock secs (53.98 usr 10.53 sys + 0.00 cusr 0.00 csys = 64.51 CPU) -Time for select_key_return_key (200000): 125 wallclock secs (59.92 usr 12.00 sys + 0.00 cusr 0.00 csys = 71.92 CPU) -Time for select_key2 (200000): 139 wallclock secs (78.61 usr 11.08 sys + 0.00 cusr 0.00 csys = 89.69 CPU) -Time for select_key2_return_key (200000): 131 wallclock secs (64.58 usr 9.61 sys + 0.00 cusr 0.00 csys = 74.19 CPU) -Time for select_key2_return_prim (200000): 134 wallclock secs (72.33 usr 11.34 sys + 0.00 cusr 0.00 csys = 83.67 CPU) - -Test of compares with simple ranges -Time for select_range_prefix (20000:43500): 9 wallclock secs ( 4.00 usr 0.89 sys + 0.00 cusr 0.00 csys = 4.89 CPU) -Time for select_range_key2 (20000:43500): 9 wallclock secs ( 4.34 usr 0.72 sys + 0.00 cusr 0.00 csys = 5.06 CPU) -Time for select_group (111): 55 wallclock secs ( 0.02 usr 0.04 sys + 0.00 cusr 0.00 csys = 0.06 CPU) -Time for min_max_on_key (15000): 8 wallclock secs ( 5.12 usr 0.76 sys + 0.00 cusr 0.00 csys = 5.88 CPU) -Time for min_max (60): 30 wallclock secs ( 0.04 usr 0.01 sys + 0.00 cusr 0.00 csys = 0.05 CPU) -Time for count_on_key (100): 52 wallclock secs ( 0.03 usr 0.02 sys + 0.00 cusr 0.00 csys = 0.05 CPU) -Time for count (100): 45 wallclock secs ( 0.01 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.01 CPU) -Time for count_distinct_big (20): 98 wallclock secs ( 0.01 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.01 CPU) - -Testing update of keys with functions -Time for update_of_key (50000): 25 wallclock secs ( 2.62 usr 1.44 sys + 0.00 cusr 0.00 csys = 4.06 CPU) -Time for update_of_key_big (501): 35 wallclock secs ( 0.05 usr 0.04 sys + 0.00 cusr 0.00 csys = 0.09 CPU) - -Testing update with key -Time for update_with_key (300000): 119 wallclock secs (18.44 usr 12.64 sys + 0.00 cusr 0.00 csys = 31.08 CPU) -Time for update_with_key_prefix (100000): 36 wallclock secs ( 6.23 usr 3.85 sys + 0.00 cusr 0.00 csys = 10.08 CPU) - -Testing update of all rows -Time for update_big (10): 64 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Testing left outer join -Time for outer_join_on_key (10:10): 60 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) -Time for outer_join (10:10): 110 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) -Time for outer_join_found (10:10): 107 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) -Time for outer_join_not_found (500:10): 59 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Testing INSERT INTO ... SELECT -Time for insert_select_1_key (1): 7 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) -Time for insert_select_2_keys (1): 9 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) -Time for drop table(2): 0 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Testing delete -Time for delete_key (10000): 4 wallclock secs ( 0.76 usr 0.29 sys + 0.00 cusr 0.00 csys = 1.05 CPU) -Time for delete_all (12): 17 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Insert into table with 16 keys and with a primary key with 16 parts -Time for insert_key (100000): 137 wallclock secs ( 9.91 usr 6.26 sys + 0.00 cusr 0.00 csys = 16.17 CPU) - -Testing update of keys -Time for update_of_primary_key_many_keys (256): 47 wallclock secs ( 0.01 usr 0.02 sys + 0.00 cusr 0.00 csys = 0.03 CPU) - -Deleting rows from the table -Time for delete_big_many_keys (128): 75 wallclock secs ( 0.03 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.03 CPU) - -Deleting everything from table -Time for delete_all_many_keys (1): 75 wallclock secs ( 0.03 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.03 CPU) - -Total time: 2736 wallclock secs (661.21 usr 182.47 sys + 0.00 cusr 0.00 csys = 843.68 CPU) diff --git a/sql-bench/Results/select-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/select-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index c224f1e223f..00000000000 --- a/sql-bench/Results/select-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,30 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 14:41:13 - -Testing the speed of selecting on keys that consist of many parts -The test-table has 10000 rows and the test is done with 500 ranges. - -Creating table -Inserting 10000 rows -Time to insert (10000): 5 wallclock secs ( 0.80 usr 0.34 sys + 0.00 cusr 0.00 csys = 1.14 CPU) - -Test if the database has a query cache -Time for select_query_cache (10000): 120 wallclock secs ( 3.67 usr 0.53 sys + 0.00 cusr 0.00 csys = 4.20 CPU) - -Time for select_query_cache2 (10000): 120 wallclock secs ( 3.80 usr 0.57 sys + 0.00 cusr 0.00 csys = 4.37 CPU) - -Testing big selects on the table -Time for select_big (70:17207): 1 wallclock secs ( 0.15 usr 0.07 sys + 0.00 cusr 0.00 csys = 0.22 CPU) -Time for select_range (410:1057904): 201 wallclock secs ( 9.05 usr 3.95 sys + 0.00 cusr 0.00 csys = 13.00 CPU) -Time for min_max_on_key (70000): 222 wallclock secs (23.16 usr 3.67 sys + 0.00 cusr 0.00 csys = 26.83 CPU) -Time for count_on_key (50000): 534 wallclock secs (16.58 usr 2.69 sys + 0.00 cusr 0.00 csys = 19.27 CPU) - -Time for count_group_on_key_parts (1000:100000): 58 wallclock secs ( 1.16 usr 0.35 sys + 0.00 cusr 0.00 csys = 1.51 CPU) -Testing count(distinct) on the table -Time for count_distinct_key_prefix (1000:1000): 51 wallclock secs ( 0.45 usr 0.08 sys + 0.00 cusr 0.00 csys = 0.53 CPU) -Time for count_distinct (1000:1000): 60 wallclock secs ( 0.42 usr 0.08 sys + 0.00 cusr 0.00 csys = 0.50 CPU) -Time for count_distinct_2 (1000:1000): 63 wallclock secs ( 0.18 usr 0.03 sys + 0.00 cusr 0.00 csys = 0.21 CPU) -Time for count_distinct_group_on_key (1000:6000): 59 wallclock secs ( 0.51 usr 0.07 sys + 0.00 cusr 0.00 csys = 0.58 CPU) -Time for count_distinct_group_on_key_parts (1000:100000): 194 wallclock secs ( 1.12 usr 0.46 sys + 0.00 cusr 0.00 csys = 1.58 CPU) -Time for count_distinct_group (1000:100000): 194 wallclock secs ( 1.21 usr 0.37 sys + 0.00 cusr 0.00 csys = 1.58 CPU) -Time for count_distinct_big (100:1000000): 67 wallclock secs ( 7.77 usr 3.16 sys + 0.00 cusr 0.00 csys = 10.93 CPU) -Total time: 1949 wallclock secs (70.03 usr 16.42 sys + 0.00 cusr 0.00 csys = 86.45 CPU) diff --git a/sql-bench/Results/wisconsin-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg b/sql-bench/Results/wisconsin-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg deleted file mode 100644 index 54a474a53d2..00000000000 --- a/sql-bench/Results/wisconsin-mysql-Linux_2.4.0_64GB_SMP_i686-cmp-mysql,pg +++ /dev/null @@ -1,14 +0,0 @@ -Testing server 'MySQL 3.23.39' at 2001-06-05 15:13:43 - -Wisconsin benchmark test - -Time for create_table (3): 0 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Inserting data -Time to insert (31000): 13 wallclock secs ( 1.59 usr 1.18 sys + 0.00 cusr 0.00 csys = 2.77 CPU) -Time to delete_big (1): 1 wallclock secs ( 0.00 usr 0.00 sys + 0.00 cusr 0.00 csys = 0.00 CPU) - -Running actual benchmark -Time for wisc_benchmark (114): 5 wallclock secs ( 2.33 usr 0.52 sys + 0.00 cusr 0.00 csys = 2.85 CPU) - -Total time: 19 wallclock secs ( 3.92 usr 1.70 sys + 0.00 cusr 0.00 csys = 5.62 CPU) diff --git a/sql-bench/graph-compare-results.sh b/sql-bench/graph-compare-results.sh index 395ad272262..317ef583886 100644 --- a/sql-bench/graph-compare-results.sh +++ b/sql-bench/graph-compare-results.sh @@ -458,7 +458,7 @@ sub gd { # set a color per server so in every result it has the same color .... foreach $key (@key_order) { if ($tot{$key}{'server'} =~ /mysql/i) { - if ($key =~ /mysql_pgcc/i || $key =~ /mysql_odbc/i) { + if ($key =~ /mysql_pgcc/i || $key =~ /mysql_odbc/i || $key =~ /mysql_fast/i) { $tot{$key}{'color'} = $lblue; } else { $tot{$key}{'color'} = $blue; diff --git a/sql/field.cc b/sql/field.cc index 629ae899494..78f57c5ceb5 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1593,7 +1593,7 @@ double Field_longlong::val_real(void) else #endif longlongget(j,ptr); - return unsigned_flag ? ulonglong2double(j) : (double) j; + return unsigned_flag ? ulonglong2double((ulonglong) j) : (double) j; } longlong Field_longlong::val_int(void) diff --git a/sql/ha_innobase.cc b/sql/ha_innobase.cc index 6bae31902c3..184c97837db 100644 --- a/sql/ha_innobase.cc +++ b/sql/ha_innobase.cc @@ -1523,6 +1523,10 @@ ha_innobase::update_row( DBUG_ENTER("ha_innobase::update_row"); + if (table->time_stamp) { + update_timestamp(new_row + table->time_stamp - 1); + } + if (last_query_id != user_thd->query_id) { prebuilt->sql_stat_start = TRUE; last_query_id = user_thd->query_id; diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 375ba081f80..5b24a1eda90 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -809,8 +809,19 @@ bool Item_sum_count_distinct::setup(THD *thd) List<Item> list; /* Create a table with an unique key over all parameters */ for (uint i=0; i < arg_count ; i++) - if (list.push_back(args[i])) - return 1; + { + Item *item=args[i]; + if (list.push_back(item)) + return 1; // End of memory + if (item->const_item()) + { + (void) item->val_int(); + if (item->null_value) + always_null=1; + } + } + if (always_null) + return 0; count_field_types(tmp_table_param,list,0); if (table) { @@ -827,15 +838,20 @@ bool Item_sum_count_distinct::setup(THD *thd) void Item_sum_count_distinct::reset() { - table->file->extra(HA_EXTRA_NO_CACHE); - table->file->delete_all_rows(); - table->file->extra(HA_EXTRA_WRITE_CACHE); - (void) add(); + if (table) + { + table->file->extra(HA_EXTRA_NO_CACHE); + table->file->delete_all_rows(); + table->file->extra(HA_EXTRA_WRITE_CACHE); + (void) add(); + } } bool Item_sum_count_distinct::add() { int error; + if (always_null) + return 0; copy_fields(tmp_table_param); copy_funcs(tmp_table_param->funcs); diff --git a/sql/item_sum.h b/sql/item_sum.h index 8cb09c85623..f68dfee1b61 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -145,11 +145,12 @@ class Item_sum_count_distinct :public Item_sum_int table_map used_table_cache; bool fix_fields(THD *thd,TABLE_LIST *tables); TMP_TABLE_PARAM *tmp_table_param; + bool always_null; public: Item_sum_count_distinct(List<Item> &list) :Item_sum_int(list),table(0),used_table_cache(~(table_map) 0), - tmp_table_param(0) + tmp_table_param(0),always_null(0) { quick_group=0; } ~Item_sum_count_distinct(); table_map used_tables() const { return used_table_cache; } diff --git a/sql/log.cc b/sql/log.cc index 4cd93261973..1654f711af8 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -652,12 +652,16 @@ bool MYSQL_LOG::write(Query_log_event* event_info) if (thd->last_insert_id_used) { Intvar_log_event e((uchar)LAST_INSERT_ID_EVENT, thd->last_insert_id); + if(thd->server_id) + e.server_id = thd->server_id; if (e.write(file)) goto err; } if (thd->insert_id_used) { Intvar_log_event e((uchar)INSERT_ID_EVENT, thd->last_insert_id); + if(thd->server_id) + e.server_id = thd->server_id; if (e.write(file)) goto err; } diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 9c134183fdd..f21c635dbdf 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -533,7 +533,7 @@ extern ulong keybuff_size,sortbuff_size,max_item_sort_length,table_cache_size, binlog_cache_size, max_binlog_cache_size; extern ulong specialflag, current_pid; extern bool low_priority_updates, using_update_log; -extern bool opt_sql_bin_update, opt_safe_show_db; +extern bool opt_sql_bin_update, opt_safe_show_db, opt_warnings; extern char language[LIBLEN],reg_ext[FN_EXTLEN],blob_newline; extern const char **errmesg; /* Error messages */ extern const char *default_tx_isolation_name; diff --git a/sql/mysqlbinlog.cc b/sql/mysqlbinlog.cc index c234e2421bf..5edfe6e0591 100644 --- a/sql/mysqlbinlog.cc +++ b/sql/mysqlbinlog.cc @@ -108,7 +108,7 @@ static void die(const char* fmt, ...) static void print_version() { - printf("%s Ver 1.3 for %s at %s\n",my_progname,SYSTEM_TYPE, MACHINE_TYPE); + printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE, MACHINE_TYPE); } @@ -132,7 +132,7 @@ the mysql command line client\n\n"); -s, --short-form Just show the queries, no extra info\n\ -o, --offset=N Skip the first N entries\n\ -h, --host=server Get the binlog from server\n\ --P, --port=port Use port to connect to the remove server\n\ +-P, --port=port Use port to connect to the remote server\n\ -u, --user=username Connect to the remove server as username\n\ -p, --password=password Password to connect to remote server\n\ -r, --result-file=file Direct output to a given file\n\ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index b155b313e8b..991b0e73c51 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -210,7 +210,7 @@ static char mysql_home[FN_REFLEN],pidfile_name[FN_REFLEN]; static pthread_t select_thread; static bool opt_log,opt_update_log,opt_bin_log,opt_slow_log,opt_noacl, opt_disable_networking=0, opt_bootstrap=0,opt_skip_show_db=0, - opt_ansi_mode=0,opt_myisam_log=0, + opt_ansi_mode=0,opt_myisam_log=0, opt_large_files=sizeof(my_off_t) > 4; bool opt_sql_bin_update = 0, opt_log_slave_updates = 0, opt_safe_show_db=0; FILE *bootstrap_file=0; @@ -275,7 +275,7 @@ ulong max_tmp_tables,max_heap_table_size; ulong bytes_sent = 0L, bytes_received = 0L; bool opt_endinfo,using_udf_functions,low_priority_updates, locked_in_memory; -bool opt_using_transactions, using_update_log; +bool opt_using_transactions, using_update_log, opt_warnings=0; bool volatile abort_loop,select_thread_in_use,grant_option; bool volatile ready_to_exit,shutdown_in_progress; ulong refresh_version=1L,flush_version=1L; /* Increments on each reload */ @@ -1190,7 +1190,7 @@ Some pointers may be invalid and cause the dump to abort...\n"); fprintf(stderr, "\n Successfully dumped variables, if you ran with --log, take a look at the\n\ details of what thread %ld did to cause the crash. In some cases of really\n\ -bad corruption, the above values may be invalid\n\n", +bad corruption, the values shown above may be invalid\n\n", thd->thread_id); } fprintf(stderr, "\ @@ -2455,7 +2455,7 @@ enum options { OPT_INNODB_LOG_ARCH_DIR, OPT_INNODB_LOG_ARCHIVE, OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT, - OPT_INNODB_UNIX_FILE_FLUSH_METHOD, + OPT_INNODB_UNIX_FILE_FLUSH_METHOD, OPT_SAFE_SHOW_DB, OPT_GEMINI_SKIP, OPT_INNODB_SKIP, OPT_TEMP_POOL, OPT_TX_ISOLATION, @@ -2516,7 +2516,7 @@ static struct option long_options[] = { OPT_INNODB_LOG_ARCHIVE}, {"innodb_flush_log_at_trx_commit", optional_argument, 0, OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT}, - {"innodb_unix_file_flush_method", required_argument, 0, + {"innodb_flush_method", required_argument, 0, OPT_INNODB_UNIX_FILE_FLUSH_METHOD}, #endif {"help", no_argument, 0, '?'}, @@ -2603,6 +2603,7 @@ static struct option long_options[] = { #endif {"user", required_argument, 0, 'u'}, {"version", no_argument, 0, 'V'}, + {"warnings", no_argument, 0, 'W'}, {0, 0, 0, 0} }; @@ -3010,6 +3011,8 @@ static void usage(void) Start without grant tables. This gives all users\n\ FULL ACCESS to all tables!\n\ --safe-mode Skip some optimize stages (for testing)\n\ + --safe-show-database Don't show databases for which the user has no\n\ + privileges\n\ --skip-concurrent-insert\n\ Don't use concurrent insert with MyISAM\n\ --skip-delay-key-write\n\ @@ -3033,7 +3036,8 @@ static void usage(void) Default transaction isolation level\n\ --temp-pool Use a pool of temporary files\n\ -u, --user=user_name Run mysqld daemon as user\n\ - -V, --version output version information and exit"); + -V, --version output version information and exit\n\ + -W, --warnings Log some not critical warnings to the log file\n"); #ifdef __WIN__ puts("NT and Win32 specific options:\n\ --console Don't remove the console window\n\ @@ -3073,7 +3077,7 @@ static void usage(void) puts("\ --innodb_data_home_dir=dir The common part for Innodb table spaces\n\ --innodb_data_file_path=dir Path to individual files and their sizes\n\ - --innodb_flush_method=# Which method to flush data\n\ + --innodb_flush_method=# With which method to flush data\n\ --innodb_flush_log_at_trx_commit[=#]\n\ Set to 0 if you don't want to flush logs\n\ --innodb_log_arch_dir=dir Where full logs should be archived\n\ @@ -3167,7 +3171,7 @@ static void get_options(int argc,char **argv) int c,option_index=0; myisam_delay_key_write=1; // Allow use of this - while ((c=getopt_long(argc,argv,"ab:C:h:#::T::?l::L:O:P:sS::t:u:noVvI?", + while ((c=getopt_long(argc,argv,"ab:C:h:#::T::?l::L:O:P:sS::t:u:noVvWI?", long_options, &option_index)) != EOF) { switch(c) { @@ -3177,6 +3181,9 @@ static void get_options(int argc,char **argv) #endif opt_endinfo=1; /* unireg: memory allocation */ break; + case 'W': + opt_warnings=1; + break; case 'a': opt_ansi_mode=1; thd_startup_options|=OPTION_ANSI_MODE; diff --git a/sql/share/czech/errmsg.txt b/sql/share/czech/errmsg.txt index 35a428273c7..6d35e913ffd 100644 --- a/sql/share/czech/errmsg.txt +++ b/sql/share/czech/errmsg.txt @@ -218,3 +218,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/danish/errmsg.txt b/sql/share/danish/errmsg.txt index b2fe6c4e800..d1e0ea71175 100644 --- a/sql/share/danish/errmsg.txt +++ b/sql/share/danish/errmsg.txt @@ -212,3 +212,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/dutch/errmsg.txt b/sql/share/dutch/errmsg.txt index 616f832bee8..7ae6c564283 100644 --- a/sql/share/dutch/errmsg.txt +++ b/sql/share/dutch/errmsg.txt @@ -209,3 +209,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/english/errmsg.txt b/sql/share/english/errmsg.txt index 018d558d7de..2a6e23b6281 100644 --- a/sql/share/english/errmsg.txt +++ b/sql/share/english/errmsg.txt @@ -209,3 +209,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/estonian/errmsg.txt b/sql/share/estonian/errmsg.txt index e1e03e4a596..264badebe38 100644 --- a/sql/share/estonian/errmsg.txt +++ b/sql/share/estonian/errmsg.txt @@ -213,3 +213,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/french/errmsg.txt b/sql/share/french/errmsg.txt index aadfecbc8a1..0da5cf94ed8 100644 --- a/sql/share/french/errmsg.txt +++ b/sql/share/french/errmsg.txt @@ -209,3 +209,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/german/errmsg.txt b/sql/share/german/errmsg.txt index 2f41fbf30c2..9abbb3a8a2f 100644 --- a/sql/share/german/errmsg.txt +++ b/sql/share/german/errmsg.txt @@ -212,3 +212,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/greek/errmsg.txt b/sql/share/greek/errmsg.txt index 5022bb65792..8f81fcfda31 100644 --- a/sql/share/greek/errmsg.txt +++ b/sql/share/greek/errmsg.txt @@ -209,3 +209,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/hungarian/errmsg.txt b/sql/share/hungarian/errmsg.txt index cfdd4b7fe75..84d8c56cd04 100644 --- a/sql/share/hungarian/errmsg.txt +++ b/sql/share/hungarian/errmsg.txt @@ -211,3 +211,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/italian/errmsg.txt b/sql/share/italian/errmsg.txt index d1b17bc8f2e..b85dc03286a 100644 --- a/sql/share/italian/errmsg.txt +++ b/sql/share/italian/errmsg.txt @@ -199,13 +199,15 @@ "La tabella '%-.64s' e` segnalata come corrotta e l'ultima ricostruzione (automatica?) e` fallita", "Attenzione: Alcune delle modifiche alle tabelle non transazionali non possono essere ripristinate (roll back impossibile)", "La transazione a comandi multipli (multi-statement) ha richiesto piu` di 'max_binlog_cache_size' bytes di disco: aumentare questa variabile di mysqld e riprovare', -"This operation cannot be performed with a running slave, run SLAVE STOP first", -"This operation requires a running slave, configure slave and do SLAVE START", -"The server is not configured as slave, fix in config file or with CHANGE MASTER TO", -"Could not initialize master info structure, check permisions on master.info", -"Could not create slave thread, check system resources", -"User %-.64s has already more than 'max_user_connections' active connections", -"You may only use constant expressions with SET", -"Lock wait timeout exceeded", -"The total number of locks exceeds the lock table size", -"Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"Questa operazione non puo' essere eseguita con un database 'slave' che gira, lanciare prima SLAVE STOP", +"Questa operaione richiede un database 'slave', configurarlo ed eseguire SLAVE START", +"Il server non e' configurato come 'slave', correggere il file di configurazione cambiando CHANGE MASTER TO", +"Impossibile inizializzare la struttura 'master info', controllare i permessi sul file master.info", +"Impossibile creare il thread 'slave', controllare le risorse di sistema", +"L'utente %-.64s ha gia' piu' di 'max_user_connections' connessioni attive", +"Si possono usare solo espressioni costanti con SET", +"E' scaduto il timeout per l'attesa del lock", +"Il numero totale di lock e' maggiore della grandezza della tabella di lock", +"I lock di aggiornamento non possono essere acquisiti durante una transazione 'READ UNCOMMITTED'", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/japanese/errmsg.txt b/sql/share/japanese/errmsg.txt index 9dfe9bb3efb..49e58079588 100644 --- a/sql/share/japanese/errmsg.txt +++ b/sql/share/japanese/errmsg.txt @@ -211,3 +211,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/korean/errmsg.txt b/sql/share/korean/errmsg.txt index 4f0f90f88ce..2e278dbd129 100644 --- a/sql/share/korean/errmsg.txt +++ b/sql/share/korean/errmsg.txt @@ -209,3 +209,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/norwegian-ny/errmsg.txt b/sql/share/norwegian-ny/errmsg.txt index 99238d61e3e..df9efbd28a4 100644 --- a/sql/share/norwegian-ny/errmsg.txt +++ b/sql/share/norwegian-ny/errmsg.txt @@ -211,3 +211,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/norwegian/errmsg.txt b/sql/share/norwegian/errmsg.txt index 473d297b649..c95669aa016 100644 --- a/sql/share/norwegian/errmsg.txt +++ b/sql/share/norwegian/errmsg.txt @@ -211,3 +211,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/polish/errmsg.txt b/sql/share/polish/errmsg.txt index 253d4afd2b7..d708bc6fffb 100644 --- a/sql/share/polish/errmsg.txt +++ b/sql/share/polish/errmsg.txt @@ -213,3 +213,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/portuguese/errmsg.txt b/sql/share/portuguese/errmsg.txt index a1247a1f1b3..b1cab63c0a0 100644 --- a/sql/share/portuguese/errmsg.txt +++ b/sql/share/portuguese/errmsg.txt @@ -209,3 +209,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/romanian/errmsg.txt b/sql/share/romanian/errmsg.txt index 384df0c864e..8069f9907bb 100644 --- a/sql/share/romanian/errmsg.txt +++ b/sql/share/romanian/errmsg.txt @@ -213,3 +213,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/russian/errmsg.txt b/sql/share/russian/errmsg.txt index 7dd24c743bb..6bc845d5599 100644 --- a/sql/share/russian/errmsg.txt +++ b/sql/share/russian/errmsg.txt @@ -212,3 +212,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/slovak/errmsg.txt b/sql/share/slovak/errmsg.txt index 2a6063b6aee..8631ee6bdeb 100644 --- a/sql/share/slovak/errmsg.txt +++ b/sql/share/slovak/errmsg.txt @@ -217,3 +217,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/spanish/errmsg.txt b/sql/share/spanish/errmsg.txt index dbf7caf585d..ea97a282c83 100644 --- a/sql/share/spanish/errmsg.txt +++ b/sql/share/spanish/errmsg.txt @@ -210,3 +210,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/share/swedish/errmsg.txt b/sql/share/swedish/errmsg.txt index fc26a08e9ee..7f43afd04b6 100644 --- a/sql/share/swedish/errmsg.txt +++ b/sql/share/swedish/errmsg.txt @@ -209,3 +209,5 @@ "Lock wait timeout exceeded", "The total number of locks exceeds the lock table size", "Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"DROP DATABASE not allowed while thread is holding global read lock", +"CREATE DATABASE not allowed while thread is holding global read lock", diff --git a/sql/slave.cc b/sql/slave.cc index 5f3f37d2cb8..a768e8494a0 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1367,7 +1367,7 @@ the slave thread with \"mysqladmin start-slave\". We stopped at log \ { // show a little mercy, allow slave to read one more event // before cutting him off - otherwise he gets stuck - // on Invar events, since they do not advance the offset + // on Intvar events, since they do not advance the offset // immediately if (++stuck_count > 2) events_till_disconnect++; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index d068dd500bc..177df8f1e81 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -498,11 +498,12 @@ void close_temporary(TABLE *table,bool delete_table) void close_temporary_tables(THD *thd) { TABLE *table,*next; - uint init_query_buf_size = 11, query_buf_size; // "drop table " - char* query, *p; + char *query, *end; + const uint init_query_buf_size = 11; // "drop table " + uint query_buf_size; bool found_user_tables = 0; - LINT_INIT(p); + LINT_INIT(end); query_buf_size = init_query_buf_size; for (table=thd->temporary_tables ; table ; table=table->next) @@ -510,37 +511,37 @@ void close_temporary_tables(THD *thd) query_buf_size += table->key_length; } - if(query_buf_size == init_query_buf_size) + if (query_buf_size == init_query_buf_size) return; // no tables to close - if((query = alloc_root(&thd->mem_root, query_buf_size))) - { - memcpy(query, "drop table ", init_query_buf_size); - p = query + init_query_buf_size; - } + if ((query = alloc_root(&thd->mem_root, query_buf_size))) + { + memcpy(query, "drop table ", init_query_buf_size); + end = query + init_query_buf_size; + } for (table=thd->temporary_tables ; table ; table=next) { - if(query) // we might be out of memory, but this is not fatal + if (query) // we might be out of memory, but this is not fatal + { + // skip temporary tables not created directly by the user + if (table->table_name[0] != '#') { - // skip temporary tables not created directly by the user - if(table->table_name[0] != '#') - { - p = strxmov(p,table->table_cache_key,".", - table->table_name,",", NullS); - // here we assume table_cache_key always starts - // with \0 terminated db name - found_user_tables = 1; - } + end = strxmov(end,table->table_cache_key,".", + table->table_name,",", NullS); + // here we assume table_cache_key always starts + // with \0 terminated db name + found_user_tables = 1; } + } next=table->next; close_temporary(table); } if (query && found_user_tables && mysql_bin_log.is_open()) { uint save_query_len = thd->query_length; - *--p = 0; - thd->query_length = (uint)(p-query); + *--end = 0; // Remove last ',' + thd->query_length = (uint)(end-query); Query_log_event qinfo(thd, query); mysql_bin_log.write(&qinfo); thd->query_length = save_query_len; diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 5243498f7fc..7cd892f6079 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -38,6 +38,32 @@ void mysql_create_db(THD *thd, char *db, uint create_options) DBUG_ENTER("mysql_create_db"); VOID(pthread_mutex_lock(&LOCK_mysql_create_db)); + VOID(pthread_mutex_lock(&LOCK_open)); + + // do not create database if another thread is holding read lock + if (global_read_lock) + { + if (thd->global_read_lock) + { + net_printf(&thd->net, ER_CREATE_DB_WITH_READ_LOCK); + VOID(pthread_mutex_unlock(&LOCK_open)); + goto exit; + } + while (global_read_lock && ! thd->killed) + { + (void) pthread_cond_wait(&COND_refresh,&LOCK_open); + } + + if (thd->killed) + { + net_printf(&thd->net, ER_SERVER_SHUTDOWN); + VOID(pthread_mutex_unlock(&LOCK_open)); + goto exit; + } + + } + + VOID(pthread_mutex_unlock(&LOCK_open)); /* Check directory */ (void)sprintf(path,"%s/%s", mysql_data_home, db); @@ -105,6 +131,26 @@ void mysql_rm_db(THD *thd,char *db,bool if_exists) VOID(pthread_mutex_lock(&LOCK_mysql_create_db)); VOID(pthread_mutex_lock(&LOCK_open)); + // do not drop database if another thread is holding read lock + if (global_read_lock) + { + if (thd->global_read_lock) + { + net_printf(&thd->net, ER_DROP_DB_WITH_READ_LOCK); + goto exit; + } + while (global_read_lock && ! thd->killed) + { + (void) pthread_cond_wait(&COND_refresh,&LOCK_open); + } + + if (thd->killed) + { + net_printf(&thd->net, ER_SERVER_SHUTDOWN); + goto exit; + } + } + (void) sprintf(path,"%s/%s",mysql_data_home,db); unpack_dirname(path,path); // Convert if not unix /* See if the directory exists */ diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index b8d2ee13b0e..a78fef62657 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -650,12 +650,9 @@ int yylex(void *arg) if (c == 'e' || c == 'E') { c = yyGet(); - if (c != '-' && c != '+' && !isdigit(c)) - { // No exp sig found - state= STATE_CHAR; - break; - } - if (!isdigit(yyGet())) + if (c == '-' || c == '+') + c = yyGet(); // Skipp sign + if (!isdigit(c)) { // No digit after sign state= STATE_CHAR; break; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 9b50be8a3c2..ef2919c2e6c 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -554,6 +554,7 @@ pthread_handler_decl(handle_one_connection,arg) free_root(&thd->mem_root,MYF(0)); if (net->error && net->vio != 0) { + if (!thd->killed && ! opt_warnings) sql_print_error(ER(ER_NEW_ABORTING_CONNECTION), thd->thread_id,(thd->db ? thd->db : "unconnected"), thd->user ? thd->user : "unauthenticated", diff --git a/sql/sql_select.cc b/sql/sql_select.cc index f464c237d78..82b73e5a48f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -400,7 +400,22 @@ mysql_select(THD *thd,TABLE_LIST *tables,List<Item> &fields,COND *conds, goto err; /* purecov: inspected */ } if (join.const_tables && !thd->locked_tables) + { + TABLE **table, **end; + for (table=join.table, end=table + join.const_tables ; + table != end; + table++) + { + /* BDB tables require that we call index_end() before doing an unlock */ + if ((*table)->key_read) + { + (*table)->key_read=0; + (*table)->file->extra(HA_EXTRA_NO_KEYREAD); + } + (*table)->file->index_end(); + } mysql_unlock_some_tables(thd, join.table,join.const_tables); + } if (!conds && join.outer_join) { /* Handle the case where we have an OUTER JOIN without a WHERE */ @@ -2761,7 +2776,12 @@ return_zero_rows(select_result *result,TABLE_LIST *tables,List<Item> &fields, if (send_row) result->send_data(fields); if (tables) // Not from do_select() + { + /* Close open cursors */ + for (TABLE_LIST *table=tables; table ; table=table->next) + table->table->file->index_end(); result->send_eof(); // Should be safe + } } DBUG_RETURN(0); } diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 39ddb79e9de..6f99495d94d 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -839,18 +839,22 @@ store_create_info(THD *thd, TABLE *table, String *packet) for (uint i=0 ; i < table->keys ; i++,key_info++) { + KEY_PART_INFO *key_part= key_info->key_part; + bool found_primary=0; packet->append(",\n ", 4); - KEY_PART_INFO *key_part= key_info->key_part; - if (i == primary_key) + if (i == primary_key && !strcmp(key_info->name,"PRIMARY")) + { + found_primary=1; packet->append("PRIMARY ", 8); + } else if (key_info->flags & HA_NOSAME) packet->append("UNIQUE ", 7); else if (key_info->flags & HA_FULLTEXT) packet->append("FULLTEXT ", 9); packet->append("KEY ", 4); - if (i != primary_key) + if (!found_primary) append_identifier(thd,packet,key_info->name); packet->append(" (", 2); diff --git a/sql/time.cc b/sql/time.cc index 086977af72f..e0b74fc9d25 100644 --- a/sql/time.cc +++ b/sql/time.cc @@ -455,8 +455,8 @@ str_to_TIME(const char *str, uint length, TIME *l_time,bool fuzzy_date) if ((date[i]=tmp_value)) date_used=1; // Found something if (i == 2 && str != end && *str == 'T') - str++; // ISO8601: CCYYMMDDThhmmss - else + str++; // ISO8601: CCYYMMDDThhmmss + else if ( i != 5 ) // Skip inter-field delimiters { while (str != end && (ispunct(*str) || isspace(*str))) { |