diff options
37 files changed, 2264 insertions, 296 deletions
diff --git a/BUILD/FINISH.sh b/BUILD/FINISH.sh index 4f13f5f8e4d..368ab339c2b 100644 --- a/BUILD/FINISH.sh +++ b/BUILD/FINISH.sh @@ -15,6 +15,10 @@ $make -k clean || true aclocal && autoheader && aclocal && automake && autoconf (cd bdb/dist && sh s_all) (cd innobase && aclocal && autoheader && aclocal && automake && autoconf) +if [ -d gemini ] +then + (cd gemini && aclocal && autoheader && aclocal && automake && autoconf) +fi CFLAGS=\"$cflags\" CXX=gcc CXXFLAGS=\"$cxxflags\" $configure" diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index f517c9a46cc..7902f10c3d3 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -1,6 +1,7 @@ -mwagner@evoq.mwagner.org -sasha@mysql.sashanet.com heikki@donna.mysql.fi miguel@linux.local -monty@tik.mysql.fi +mikef@nslinux.bedford.progress.com monty@donna.mysql.fi +monty@tik.mysql.fi +mwagner@evoq.mwagner.org +sasha@mysql.sashanet.com diff --git a/Docs/manual.texi b/Docs/manual.texi index 5437b29fb53..ead53187c27 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -528,10 +528,25 @@ BDB or Berkeley_DB Tables GEMINI Tables -* GEMINI overview:: -* GEMINI start:: -* GEMINI features:: -* GEMINI TODO:: +* GEMINI Overview:: +* Using GEMINI Tables:: + +GEMINI Overview + +* GEMINI Features:: +* GEMINI Concepts:: +* GEMINI Limitations:: + +Using GEMINI Tables + +* Startup Options:: +* Creating GEMINI Tables:: +* Backing Up GEMINI Tables:: +* Restoring GEMINI Tables:: +* Using Auto_Increment Columns With GEMINI Tables:: +* Performance Considerations:: +* Sample Configurations:: +* When To Use GEMINI Tables:: InnoDB Tables @@ -10096,7 +10111,7 @@ If you are using BDB (Berkeley DB) tables, you should familiarize yourself with the different BDB specific startup options. @xref{BDB start}. If you are using Gemini tables, refer to the Gemini-specific startup options. -@xref{GEMINI start}. +@xref{Using GEMINI Tables}. If you are using InnoDB tables, refer to the InnoDB-specific startup options. @xref{InnoDB start}. @@ -18845,7 +18860,7 @@ When you insert a value of @code{NULL} (recommended) or @code{0} into an If you delete the row containing the maximum value for an @code{AUTO_INCREMENT} column, the value will be reused with an -@code{ISAM}, @code{BDB} or @code{INNODB} table but not with a +@code{ISAM}, @code{GEMINI}, @code{BDB} or @code{INNODB} table but not with a @code{MyISAM} table. If you delete all rows in the table with @code{DELETE FROM table_name} (without a @code{WHERE}) in @code{AUTOCOMMIT} mode, the sequence starts over for both table types. @@ -24535,87 +24550,849 @@ not in @code{auto_commit} mode, until this problem is fixed (the fix is not trivial). @end itemize -@cindex tables, @code{GEMINI} +@cindex GEMINI tables @node GEMINI, InnoDB, BDB, Table types @section GEMINI Tables +@cindex GEMINI tables, overview @menu -* GEMINI overview:: -* GEMINI start:: -* GEMINI features:: -* GEMINI TODO:: +* GEMINI Overview:: +* Using GEMINI Tables:: @end menu -@node GEMINI overview, GEMINI start, GEMINI, GEMINI -@subsection Overview of GEMINI tables +@node GEMINI Overview, Using GEMINI Tables, GEMINI, GEMINI +@subsection GEMINI Overview + +@code{GEMINI} is a transaction-safe table handler for @strong{MySQL}. It +provides row-level locking, robust transaction support and reliable +crash recovery. It is targeted for databases that need to handle heavy +multi-user updates typical of transaction processing applications while +still providing excellent performance for read-intensive operations. The +@code{GEMINI} table type is developed and supported by NuSphere +Corporation (see @url{http://www.nusphere.com}). + +@code{GEMINI} provides full ACID transaction properties (Atomic, +Consistent, Independent, and Durable) with a programming model that +includes support for statement atomicity and all four standard isolation +levels (Read Uncommitted, Read Committed, Repeatable Read, and +Serializable) defined in the SQL standard. + +The @code{GEMINI} tables support row-level and table-level locking to +increase concurrency in applications and allow reading of tables without +locking for maximum concurrency in a heavy update environment. The +transaction, locking, and recovery mechanisms are tightly integrated to +eliminate unnecessary administration overhead. + +In general, if @code{GEMINI} tables are selected for an application, it +is recommended that all tables updated in the application be +@code{GEMINI} tables to provide well-defined system behavior. If +non-@code{GEMINI} tables are mixed into the application then, ACID +transaction properties cannot be maintained. While there are clearly +cases where mixing table types is appropriate, it should always be done +with careful consideration of the impact on transaction consistency and +recoverability needs of the application and underlying database. + +The @code{GEMINI} table type is derived from a successful commercial +database and uses the storage kernel technology tightly integrated with +@strong{MySQL} server. The basic @code{GEMINI} technology is in use by +millions of users worldwide in production environments today. This +maturity allows @code{GEMINI} tables to provide a solution for those +users who require transaction-based behavior as part of their +applications. -The @code{GEMINI} table type is developed and supported by NuSphere Corporation -(@uref{http://www.nusphere.com}). It features row-level locking, transaction -support (@code{COMMIT} and @code{ROLLBACK}), and automatic crash recovery. +The @code{GEMINI} table handler supports a configurable data cache that +allows a significant portion of any database to be maintained in memory +while still allowing durable updates. -@code{GEMINI} tables will be included in some future @strong{MySQL} 3.23.X -source distribution. +@cindex GEMINI tables, features +@menu +* GEMINI Features:: +* GEMINI Concepts:: +* GEMINI Limitations:: +@end menu -@node GEMINI start, GEMINI features, GEMINI overview, GEMINI -@subsection GEMINI startup options +@node GEMINI Features, GEMINI Concepts, GEMINI Overview, GEMINI Overview +@subsubsection GEMINI Features -If you are running with @code{AUTOCOMMIT=0} then your changes in @code{GEMINI} -tables will not be updated until you execute @code{COMMIT}. Instead of commit -you can execute @code{ROLLBACK} to forget your changes. @xref{COMMIT}. +The following summarizes the major features provided by @code{GEMINI} +tables. -If you are running with @code{AUTOCOMMIT=1} (the default), your changes -will be committed immediately. You can start an extended transaction with -the @code{BEGIN WORK} SQL command, after which your changes will not be -committed until you execute @code{COMMIT} (or decide to @code{ROLLBACK} -the changes). +@itemize @bullet +@item +Supports all optimization statistics used by the @strong{MySQL} optimizer +including table cardinality, index range estimates and multi-component +selectivity to insure optimal query performance. -The following options to @code{mysqld} can be used to change the behavior of -GEMINI tables: +@item +Maintains exact cardinality information for each table so @code{SELECT +COUNT(*) FROM} table-name always returns an answer immediately. + +@item +Supports index-only queries; when index data is sufficient to resolve a +query no record data is read (for non character types). + +@item +@code{GEMINI} uses block based I/O for better performance. There is no +performance penalty for using @code{VARCHAR} fields. The maximum record size is +currently 32K. + +@item +The number of rows in a single @code{GEMINI} table can be 4 quintillion +(full use of 64 bits). + +@item +Individual tables can be as large as 16 petabytes. + +@item +Locking is done at a record or row level rather than at table level +unless table locks are explicitly requested. When a row is inserted into +a table, other rows can be updated, inserted or deleted without waiting +for the inserted row to be committed. + +@item +Provides durable transactions backed by a crash recovery mechanism that +returns the database to a known consistent state in the event of an +unexpected failure. + +@item +Support for all isolation levels and statement atomicity defined in the +SQL standard. + +@item +Reliable Master Replication; the master database can survive system +failure and recover all committed transactions. +@end itemize + +@cindex GEMINI tables, concepts +@node GEMINI Concepts, GEMINI Limitations, GEMINI Features, GEMINI Overview +@subsubsection GEMINI Concepts + +This section highlights some of the important concepts behind +@code{GEMINI} and the @code{GEMINI} programming model, including: + +@itemize @bullet +@item +ACID Transactions +@item +Transaction COMMIT/ROLLBACK +@item +Statement Atomicity +@item +Recovery +@item +Isolation Levels +@item +Row-Level Locking +@end itemize + +These features are described below. + +@cindex GEMINI tables, ACID transactions +@noindent +@strong{ACID Transactions} + +ACID in the context of transactions is an acronym which stands for +@emph{Atomicity}, @emph{Consistency}, @emph{Isolation}, @emph{Durability}. + +@multitable @columnfractions .25 .75 +@item @sc{Attribute} @tab @sc{Description} +@item +@strong{Atomicity} +@tab A transaction allows for the grouping of one or more changes to +tables and rows in the database to form an atomic or indivisible +operation. That is, either all of the changes occur or none of them +do. If for any reason the transaction cannot be completed, everything +this transaction changed can be restored to the state it was in prior to +the start of the transaction via a rollback operation. + +@item +@strong{Consistency} +@tab +Transactions always operate on a consistent view of the data and when +they end always leave the data in a consistent state. Data may be said to +be consistent as long as it conforms to a set of invariants, such as no +two rows in the customer table have the same customer ID and all orders +have an associated customer row. While a transaction executes, these +invariants may be violated, but no other transaction will be allowed to +see these inconsistencies, and all such inconsistencies will have been +eliminated by the time the transaction ends. + +@item +@strong{Isolation} +@tab To a given transaction, it should appear as though it is running +all by itself on the database. The effects of concurrently running +transactions are invisible to this transaction, and the effects of this +transaction are invisible to others until the transaction is committed. + +@item +@strong{Durability} +@tab Once a transaction is committed, its effects are guaranteed to +persist even in the event of subsequent system failures. Until the +transaction commits, not only are any changes made by that transaction +not durable, but are guaranteed not to persist in the face of a system +failures, as crash recovery will rollback their effects. +@end multitable + +@cindex GEMINI tables, COMMIT/ROLLBACK +@noindent +@strong{Transaction COMMIT/ROLLBACK} + +As stated above, a transaction is a group of work being done to +data. Unless otherwise directed, @strong{MySQL} considers each statement +a transaction in itself. Multiple updates can be accomplished by placing +them in a single statement, however they are limited to a single table. + +Applications tend to require more robust use of transaction +concepts. Take, for example, a system that processes an order: A row may +be inserted in an order table, additional rows may be added to an +order-line table, updates may be made to inventory tables, etc. It is +important that if the order completes, all the changes are made to all +the tables involved; likewise if the order fails, none of the changes to +the tables must occur. To facilitate this requirement, @strong{MySQL} +has syntax to start a transaction called @code{BEGIN WORK}. All +statements that occur after the @code{BEGIN WORK} statement are grouped +into a single transaction. The end of this transaction occurs when a +@code{COMMIT} or @code{ROLLBACK} statement is encountered. After the +@code{COMMIT} or @code{ROLLBACK} the system returns back to the behavior +before the @code{BEGIN WORK} statement was encountered where every +statement is a transaction. + +To permanently turn off the behavior where every statement is a +transaction, @strong{MySQL} added a variable called +@code{AUTOCOMMIT}. The @code{AUTOCOMMIT} variable can have two values, +@code{1} and @code{0}. The mode where every statement is a transaction +is when @code{AUTOCOMMIT} is set to @code{1} (@code{AUTOCOMMIT=1}). When +@code{AUTOCOMMIT} is set to @code{0} (@code{AUTOCOMMIT=0}), then every +statement is part of the same transaction until the transaction end by +either @code{COMMIT} or @code{ROLLBACK}. Once a transaction completes, a +new transaction is immediately started and the process repeats. + +Here is an example of the SQL statements that you may find in a typical +order: + +@example +BEGIN WORK; + INSERT INTO order VALUES ...; + INSERT INTO order-lines VALUES ...; + INSERT INTO order-lines VALUES ...; + INSERT INTO order-lines VALUES ...; + UPDATE inventory WHERE ...; +COMMIT; +@end example + +This example shows how to use the @code{BEGIN WORK} statement to start a +transaction. If the variable @code{AUTOCOMMIT} is set to @code{0}, then +a transaction would have been started already. In this case, the +@code{BEGIN WORK} commits the current transaction and starts a new one. + +@cindex GEMINI tables, statement atomicity +@noindent +@strong{Statement Atomicity} + +As mentioned above, when running with @code{AUTOCOMMIT} set to @code{1}, +each statement executes as a single transaction. When a statement has an +error, then all changes make by the statement must be +undone. Transactions support this behavior. Non-transaction safe table +handlers would have a partial statement update where some of the changes +from the statement would be contained in the database and other changes +from the statement would not. Work would need to be done to manually +recover from the error. + +@cindex GEMINI tables, recovery +@noindent +@strong{Recovery} + +Transactions are the basis for database recovery. Recovery is what +supports the Durability attribute of the ACID transaction. + +@code{GEMINI} uses a separate file called the Recovery Log located in +the @code{$DATADIR} directory named @code{gemini.rl}. This file +maintains the integrity of all the @code{GEMINI} tables. @code{GEMINI} +can not recover any data from non-@code{GEMINI} tables. In addition, the +@code{gemini.rl} file is used to rollback transactions in support of the +@code{ROLLBACK} statement. + +In the event of a system failure, the next time the @strong{MySQL} +server is started, @code{GEMINI} will automatically go through its +crash recovery process. The result of crash recovery is that all the +@code{GEMINI} tables will contain the latest changes made to them, and +all transactions that were open at the time of the crash will have been +rolled back. + +The @code{GEMINI} Recovery Log reuses space when it can. Space can be +reused when information in the Recovery Log is no longer needed for +crash recovery or rollback. + +@cindex GEMINI tables, isolation levels +@noindent +@strong{Isolation Levels} + +There are four isolation levels supported by @code{GEMINI}: + +@itemize @bullet +@item +READ UNCOMMITTED +@item +READ COMMITTED +@item +REPEATABLE READ +@item +SERIALIZABLE +@end itemize + +These isolation levels apply only to shared locks obtained by select +statements, excluding select for update. Statements that get exclusive +locks always retain those locks until the transaction commits or rolls +back. + +By default, @code{GEMINI} operates at the @code{READ COMMITTED} +level. You can override the default using the following command: + +@example +SET [GLOBAL | SESSION] TRANSACTION ISOLATION LEVEL [READ UNCOMMITTED | +READ COMMITTED | REPEATABLE READ | SERIALIZABLE ] +@end example + +If the @code{SESSION} qualifier used, the specified isolation level +persists for the entire session. If the @code{GLOBAL} qualifier is used, +the specified isolation level is applied to all new connections from +this point forward. Note that the specified isolation level will not +change the behavior for existing connections including the connection +that exectues the @code{SET GLOBAL TRANSACTION ISOLATION LEVEL} +statement. @multitable @columnfractions .30 .70 -@item @strong{Option} @tab @strong{Meaning} -@item @code{--gemini-full-recovery} @tab Default. -@item @code{--gemini-no-recovery} @tab Turn off recovery logging. Not recommended. -@item @code{--gemini-lazy-commit} @tab Relaxes the flush log at commit rule. -@item @code{--gemini-unbuffered-io} @tab All database writes bypass OS cache. -@item @code{--skip-gemini} @tab Don't use Gemini. -@item @code{--O gemini_db_buffers=#} @tab Number of database buffers in database cache. -@item @code{--O gemini_connection_limit=#} @tab Maximum number of connections to Gemini. -@item @code{--O gemini_spin_retries=#} @tab Spin lock retries (optimization). -@item @code{--O gemini_io_threads=#} @tab Number of background I/O threads. -@item @code{--O gemini_lock_table_size=#} @tab Set the maximum number of locks. Default 4096. +@item @sc{Isolation Level} @tab @sc{Description} + +@item +@strong{READ UNCOMMITTED} +@tab Does not obtain any locks when reading rows. This means that if a +row is locked by another process in a transaction that has a more strict +isolation level, the @code{READ UNCOMMITTED} query will not wait until +the locks are released before reading the row. You will get an error if +attempt any updates while running at this isolation level. + +@item +@strong{READ COMMITTED} +@tab Locks the requested rows long enough to copy the row from the +database block to the client row buffer. If a @code{READ COMMITTED} +query finds that a row is locked exclusively by another process, it will +wait until either the row has been released, or the lock timeout value +has expired. + +@item +@strong{REPEATABLE READ} +@tab Locks all the rows needed to satisfy the query. These locks are +held until the transaction ends (commits or rolls back). If a +@code{REPEATABLE READ} query finds that a row is locked exclusively by +another process, it will wait until either the row has been released, or +the lock timeout value has expired. + +@item +@strong{SERIALIZABLE} +@tab Locks the table that contains the rows needed to satisfy the +query. This lock is held until the transaction ends (commits or rolls +back). If a @code{SERIALIZABLE} query finds that a row is exclusively +locked by another process, it will wait until either the row has been +released, or the lock timeout value has expired. +@end multitable + +The statements that get exclusive locks are @code{INSERT}, +@code{UPDATE}, @code{DELETE} and @code{SELECT ... FOR UPDATE}. Select +statements without the @code{FOR UPDATE} qualifier get shared locks +which allow other not ''for update'' select statements to read the same +rows but block anyone trying to update the row from accessing it. Rows +or tables with exclusive locks block all access to the row from other +transactions until the transaction ends. + +In general terms, the higher the Isolation level the more likelihood of +having concurrent locks and therefore lock conflicts. In such cases, +adjust the @code{-O gemini_lock_table_size} accordingly. + +@cindex GEMINI tables, row-level locking +@noindent +@strong{Row-Level Locking} + +@code{GEMINI} uses row locks, which allows high concurrency for requests +on the same table. + +In order to avoid lock table overflow, SQL statements that require +applying locks to a large number of rows should either be run at the +serializable isolation level or should be covered by a lock table +statement. + +Memory must be pre-allocated for the lock table. The mysqld server +startup option @code{-0 gemini_lock_table_size} can be used to adjust +the number of concurrent locks. + +@cindex GEMINI tables, limitations +@node GEMINI Limitations, , GEMINI Concepts, GEMINI Overview +@subsubsection GEMINI Limitations + +The following limitations are in effect for the current version of +@code{GEMINI}: + +@itemize @bullet +@item +@code{DROP DATABASE} does not work with @code{GEMINI} tables; instead, +drop all the tables in the database first, then drop the database. + +@item +Maximum number of @code{GEMINI} tables is 1012. + +@item +Maximum number of @code{GEMINI} files a server can manage is 1012. Each +table consumes one file; an additional file is consumed if the table has +any indexes defined on it. + +@item +Maximum size of BLOBs is 16MB. + +@item +@code{FULLTEXT} indexes are not supported with @code{GEMINI} tables. + +@item +There is no support for multi-component @code{AUTO_INCREMENT} fields +that provide alternating values at the component level. If you try to +create such a field, @code{GEMINI} will refuse. + +@item +@code{TEMPORARY TABLES} are not supported by @code{GEMINI}. The +statement @code{CREATE TEMPORARY TABLE ... TYPE=GEMINI} will generate +the response: @code{ERROR 1005: Can't create table '/tmp/#sqlxxxxx' +(errno: 0)}. + +@item +@code{FLUSH TABLES} has not been implemented with @code{GEMINI} tables. +@end itemize + +@cindex GEMINI tables, using +@node Using GEMINI Tables, , GEMINI Overview, GEMINI +@subsection Using GEMINI Tables + +This section explains the various startup options you can use with +@code{GEMINI} tables, how to backup @code{GEMINI} tables, some +performance considerations and sample configurations, and a brief +discussion of when to use @code{GEMINI} tables. + +Specifically, the topics covered in this section are: + +@itemize @bullet +@item +Startup Options +@item +Creating @code{GEMINI} Tables +@item +Backing Up @code{GEMINI} Tables +@item +Using Auto_Increment Columns With @code{GEMINI} Tables +@item +Performance Considerations +@item +Sample Configurations +@item +When To Use @code{GEMINI} Tables +@end itemize + +@cindex GEMINI tables, startup options +@menu +* Startup Options:: +* Creating GEMINI Tables:: +* Backing Up GEMINI Tables:: +* Restoring GEMINI Tables:: +* Using Auto_Increment Columns With GEMINI Tables:: +* Performance Considerations:: +* Sample Configurations:: +* When To Use GEMINI Tables:: +@end menu + +@node Startup Options, Creating GEMINI Tables, Using GEMINI Tables, Using GEMINI Tables +@subsubsection Startup Options + +The table below lists options to mysqld that can be used to change the +behavior of @code{GEMINI} tables. + +@multitable @columnfractions .40 .60 +@item @sc{Option} @tab @sc{Description} + +@item +@code{--default-table-type=gemini} +@tab Sets the default table handler to be @code{GEMINI}. All create +table statements will create @code{GEMINI} tables unless otherwise +specified with @code{TYPE=@var{table-type}}. As noted above, there is +currently a limitation with @code{TEMPORARY} tables using @code{GEMINI}. + +@item +@code{--gemini-flush-log-at-commit} +@tab Forces the recovery log buffers to be flushed after every +commit. This can have a serious performance penalty, so use with +caution. + +@item +@code{--gemini-recovery=FULL | NONE | FORCE} +@tab Sets the recovery mode. Default is @code{FULL}. @code{NONE} is +useful for performing repeatable batch operations because the updates +are not recorded in the recovery log. @code{FORCE} skips crash recovery +upon startup; this corrupts the database, and should be used in +emergencies only. + +@item +@code{--gemini-unbuffered-io} +@tab All database writes bypass the OS cache. This can provide a +performance boost on heavily updated systems where most of the dataset +being worked on is cached in memory with the @code{gemini_buffer_cache} +parameter. + +@item +@code{--O gemini_buffer_cache=size} +@tab Amount of memory to allocate for database buffers, including Index +and Record information. It is recommended that this number be 10% of the +total size of all @code{GEMINI} tables. Do not exceed amount of memory +on the system! + +@item +@code{--O gemini_connection_limit=#} +@tab Maximum number of connections to @code{GEMINI}; default is +@code{100}. Each connection consumes about 1K of memory. + +@item +@code{--O gemini_io_threads=#} +@tab Number of background I/O threads; default is @code{2}. Increase the +number when using @code{--gemini-unbuffered-io} + +@item +@code{--O gemini_lock_table_size=#} +@tab Sets the maximum number of concurrent locks; default is 4096. Using +@code{SET [ GLOBAL | SESSION ] TRANSACTION ISOLATION = ...} will +determine how long a program will hold row locks. + +@item +@code{--O gemini_lock_wait_timeout=seconds} +@tab Number of seconds to wait for record locks when performing queries; +default is 10 seconds. Using @code{SET [ GLOBAL | SESSION ] TRANSACTION +ISOLATION = ...} will determine how long a program will hold row locks. + +@item +@code{--skip-gemini} +@tab Do not use @code{GEMINI}. If you use @code{--skip-gemini}, @strong{MySQL} +will not initialize the @code{GEMINI} table handler, saving memory; you +cannot use @code{GEMINI} tables if you use @code{--skip-gemini}. + +@item +@code{--transaction-isolation=READ-UNCOMMITTED | READ-COMMITTED | REPEATABLE-READ | SERIALIZABLE} +@tab Sets the GLOBAL transaction isolation level for all users that +connect to the server; can be overridden with the SET ISOLATION LEVEL +statement. @end multitable -If you use @code{--skip-gemini}, @strong{MySQL} will not initialize the -Gemini table handler, saving memory; you cannot use Gemini tables if you -use @code{--skip-gemini}. +@cindex GEMINI tables, creating +@node Creating GEMINI Tables, Backing Up GEMINI Tables, Startup Options, Using GEMINI Tables +@subsubsection Creating GEMINI Tables -@node GEMINI features, GEMINI TODO, GEMINI start, GEMINI -@subsection Features of @code{GEMINI} tables: +@code{GEMINI} tables can be created by either using the @code{CREATE +TABLE} syntax or the @code{ALTER TABLE} syntax. @itemize @bullet @item -If a query result can be resolved solely from the index key, Gemini will -not read the actual row stored in the database. +The syntax for creating a @code{GEMINI} table is: + +@example +CREATE TABLE @var{table-name} (....) TYPE=GEMINI; +@end example + +@item +The syntax to convert a table to @code{GEMINI} is: + +@example +ALTER TABLE @var{table-name} TYPE=GEMINI; +@end example +@end itemize + +@xref{Tutorial}, for more information on how to create and use +@code{MySQL} tables. + +@cindex GEMINI tables, backing up +@node Backing Up GEMINI Tables, Restoring GEMINI Tables, Creating GEMINI Tables, Using GEMINI Tables +@subsubsection Backing Up GEMINI Tables + +@code{GEMINI} supports both @code{BACKUP TABLE} and @code{RESTORE TABLE} +syntax. To learn more about how to use @code{BACKUP} and @code{RESTORE}, +see @ref{BACKUP TABLE} and @ref{RESTORE TABLE}. + +To backup @code{GEMINI} tables outside of the @code{MySQL} environment, +you must first shut down the @code{MySQL} server. Once the server is +shut down, you can copy the files associated with @code{GEMINI} to a +different location. The files that make up the @code{GEMINI} table +handler are: + +@itemize @bullet +@item +All files associated with a table with a @code{.gmd} extention below the +@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, +@code{@var{table}.gmi}, and @code{@var{table}.frm} +@item +@code{gemini.db} in the @code{$DATADIR} directory +@item +@code{gemini.rl} in the @code{$DATADIR} directory +@item +@code{gemini.lg} in the @code{$DATADIR} directory +@end itemize + +All the @code{GEMINI} files must be copied together. You can not copy +just the @code{.gmi} and @code{.gmd} files to a different +@code{$DATADIR} and have them become part of a new database. You can +copy an entire @code{$DATADIR} directory to another location and start a +@strong{MySQL} server using the new @code{$DATADIR}. + +@cindex GEMINI tables, restoring +@node Restoring GEMINI Tables, Using Auto_Increment Columns With GEMINI Tables, Backing Up GEMINI Tables, Using GEMINI Tables +@subsubsection Restoring GEMINI Tables + +To restore @code{GEMINI} tables outside of the @code{MySQL} environment, +you must first shut down the @code{MySQL} server. Once the server is +shut down, you can remove all @code{GEMINI} files in the target +@code{$DATADIR} and then copy the files previously backed up into the +@code{$DATADIR} directory. + +As mentioned above, the files that make up the @code{GEMINI} table +handler are: + +@itemize @bullet +@item +All files associated with a table with a @code{.gmd} extention below the +@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, +@code{@var{table}.gmi}, and @code{@var{table}.frm} +@item +@code{gemini.db} in the @code{$DATADIR} directory +@item +@code{gemini.rl} in the @code{$DATADIR} directory +@item +@code{gemini.lg} in the @code{$DATADIR} directory +@end itemize + +When restoring a table, all the @code{GEMINI} files must be copied +together. You can not restore just the @code{.gmi} and @code{.gmd} +files. + +@cindex GEMINI tables, auto_increment +@node Using Auto_Increment Columns With GEMINI Tables, Performance Considerations, Restoring GEMINI Tables, Using GEMINI Tables +@subsubsection Using Auto_Increment Columns With GEMINI Tables + +As mentioned previously, @code{GEMINI} tables support row-level and +table-level locking to increase concurrency in applications and to allow +reading of tables without locking for maximum concurrency in heavy +update environments. This feature has several implications when working +with @code{auto_increment} tables. + +In @code{MySQL}, when a column is defined as an @code{auto_increment} +column, and a row is inserted into the table with a @code{NULL} for the +column, the @code{auto_increment} column is updated to be 1 higher than +the highest value in the column. + +With @code{MyISAM} tables, the @code{auto_increment} function is +implemented by looking in the index and finding the highest value and +adding 1 to it. This is possible because the entire @code{ISAM} table is +locked during the update period and the increment value is therefore +guaranteed to not be changing. + +With @code{GEMINI} tables, the @code{auto_increment} function is +implemented by maintaining a counter in a separate location from the +table data. Instead of looking at the highest value in the table index, +@code{GEMINI} tables look at this separately maintained counter. This +means that in a transactional model, unlike the bottleneck inherent in +the @code{MyISAM} approach, @code{GEMINI} users do @b{not} have to wait +until the transaction that added the last value either commits or +rollbacks before looking at the value. + +Two side-effects of the @code{GEMINI} implementation are: + +@itemize @bullet @item -Locking on Gemini tables is done at row level. +If an insert is done where the column with the @code{auto_increment} is +specified, and this specified value is the highest value, @code{MyISAM} +uses it as its @code{auto_increment} value, and every subsequent insert +is based on this. By contrast, @code{GEMINI} does not use this value, +but instead uses the value maintained in the separate @code{GEMINI} +counter location. + @item -@code{SELECT COUNT(*) FROM table_name} is fast; Gemini maintains a count -of the number of rows in the table. +To set the counter to a specific value, you can use @code{SET +insert_id=#} and insert a new row in the table. However, as a general +rule, values should not be inserted into an @code{auto_increment} +column; the database manager should be maintaining this field, not the +application. @code{SET insert_id} is a recovery mechanism that should be +used in case of error only. @end itemize -@node GEMINI TODO, , GEMINI features, GEMINI -@subsection Current limitations of @code{GEMINI} tables: +Note that if you delete the row containing the maximum value for an +@code{auto_increment} column, the value will be reused with a +@code{GEMINI} table but not with a @code{MyISAM} table. + +See @ref{CREATE TABLE} for more information about creating +@code{auto_increment} columns. + +@cindex GEMINI tables, peformance considerations +@node Performance Considerations, Sample Configurations, Using Auto_Increment Columns With GEMINI Tables, Using GEMINI Tables +@subsubsection Performance Considerations + +In addition to designing the best possible application, configuration of +the data and the server startup parameters need to be considered. How +the hardware is being used can have a dramatic affect on how fast the +system will respond to queries. Disk Drives and Memory must both be +considered. + +@noindent +@strong{Disk Drives} + +For best performance, you want to spread the data out over as many disks +as possible. Using RAID 10 stripes work very well. If there are a lot of +updates then the recovery log (@code{gemini.rl}) should be on a +relatively quiet disk drive. + +To spread the data out without using RAID 10, you can do the following: @itemize @bullet @item -BLOB columns are not supported in @code{GEMINI} tables. +Group all the tables into three categories: Heavy Use, Moderate Use, +Light Use. + +@item +Take the number of disk drives available and use a round-robin approach +to the three categories grouping the tables on a disk drive. The result +will be an equal distribution of Heavy/Moderate/Light tables assigned to +each disk drive. + +@item +Once the tables have been converted to @code{GEMINI} by using the +@code{ALTER TABLE <name> TYPE=GEMINI} statements, move (@code{mv}) the +@code{.gmd} and @code{.gmi} files to a different disk drive and link +(@code{ln -s}) them back to the original directory where the @code{.frm} +file resides. + @item -The maximum number of concurrent users accessing @code{GEMINI} tables is -limited by @code{gemini_connection_limit}. The default is 100 users. +Finally, move the @code{gemini.rl} file to its quiet disk location and link +the file back to the @code{$DATADIR} directory. @end itemize -NuSphere is working on removing these limitations. +@noindent +@strong{Memory} + +The more data that can be placed in memory the faster the access to the +data. Figure out how large the @code{GEMINI} data is by adding up the +@code{.gmd} and @code{.gmi} file sizes. If you can, put at least 10% of +the data into memory. You allocate memory for the rows and indexes by +using the @code{gemini_buffer_cache} startup parameter. For example: + +@example +mysqld -O gemini_buffer_cache=800M +@end example + +@noindent +would allocate 800 MB of memory for the @code{GEMINI} buffer cache. + +@cindex GEMINI tables, sample configurations +@node Sample Configurations, When To Use GEMINI Tables, Performance Considerations, Using GEMINI Tables +@subsubsection Sample Configurations + +Based on the performance considerations above, we can look at some +examples for how to get the best performance out of the system when +using @code{GEMINI} tables. + +@multitable @columnfractions .30 .70 +@item @sc{Hardware} @tab @sc{Configuration} +@item +One CPU, 128MB memory, one disk drive +@tab Allocate 80MB of memory for reading and updating @code{GEMINI} +tables by starting the mysqld server with the following option: + +@example +-O gemini_buffer_cache=80M +@end example + +@item +Two CPUs, 512MB memory, four disk drives +@tab Use RAID 10 to stripe the data across all available disks, or use +the method described in the performance considerations section, +above. Allocate 450MB of memory for reading/updating @code{GEMINI} +tables: + +@example +-O gemini_buffer_cache=450M +@end example +@end multitable + +@cindex GEMINI tables, when to use +@node When To Use GEMINI Tables, , Sample Configurations, Using GEMINI Tables +@subsubsection When To Use GEMINI Tables + +Because the @code{GEMINI} table handler provides crash recovery and +transaction support, there is extra overhead that is not found in other +non-transaction safe table handlers. Here are some general guidelines +for when to employ @code{GEMINI} and when to use other non-transaction +safe tables (@code{NTST}). + +@multitable @columnfractions .30 .25 .45 +@item +@sc{Access Trends} @tab @sc{Table Type} @tab @sc{Reason} +@item +Read-only +@tab @code{NTST} +@tab Less overhead and faster +@item +Critical data +@tab @code{GEMINI} +@tab Crash recovery protection +@item +High concurrency +@tab @code{GEMINI} +@tab Row-level locking +@item +Heavy update +@tab @code{GEMINI} +@tab Row-level locking +@end multitable + +The table below shows how a typical application schema could be defined. + +@multitable @columnfractions .15 .30 .25 .30 +@item +@sc{Table} @tab @sc{Contents} @tab @sc{Table Type} @tab @sc{Reason} +@item +account +@tab Customer account data +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +order +@tab Orders for a customer +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +orderline +@tab Orderline detail for an order +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +invdesc +@tab Inventory description +@tab @code{NTST} +@tab Read-only, frequent access +@item +salesrep +@tab Sales rep information +@tab @code{NTST} +@tab Infrequent update +@item +inventory +@tab Inventory information +@tab @code{GEMINI} +@tab High concurrency, critical data +@item +config +@tab System configuration +@tab @code{NTST} +@tab Read-only +@end multitable @node InnoDB, , GEMINI, Table types @section InnoDB Tables diff --git a/acinclude.m4 b/acinclude.m4 index ab2ea5cddd1..59b6e909225 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -999,10 +999,10 @@ dnl echo "DBG_GEM1: gemini='$gemini'" gemini_includes= gemini_libs= case "$gemini" in - no | default | *) + no) AC_MSG_RESULT([Not using Gemini DB]) ;; - yes ) + yes | default | *) have_gemini_db="yes" gemini_includes="-I../gemini/incl -I../gemini" gemini_libs="\ diff --git a/configure.in b/configure.in index 4e73bb901fa..38df0e950c4 100644 --- a/configure.in +++ b/configure.in @@ -2020,6 +2020,17 @@ EOF echo "END OF INNODB CONFIGURATION" fi + if test "X$have_gemini_db" = "Xyes"; then + sql_server_dirs="gemini $sql_server_dirs" + echo "CONFIGURING FOR GEMINI DB" + (cd gemini && sh ./configure) \ + || AC_MSG_ERROR([could not configure Gemini DB]) + + echo "END OF GEMINI DB CONFIGURATION" + + AC_DEFINE(HAVE_GEMINI_DB) + fi + if test "$with_posix_threads" = "no" -o "$with_mit_threads" = "yes" then # MIT user level threads diff --git a/include/my_base.h b/include/my_base.h index aee9f7af3f1..bb2e4128195 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -213,6 +213,7 @@ enum ha_base_keytype { #define HA_ERR_CRASHED_ON_USAGE 145 /* Table must be repaired */ #define HA_ERR_LOCK_WAIT_TIMEOUT 146 #define HA_ERR_LOCK_TABLE_FULL 147 +#define HA_ERR_READ_ONLY_TRANSACTION 148 /* Updates not allowed */ /* Other constants */ diff --git a/include/mysqld_error.h b/include/mysqld_error.h index 4f46c40ff49..e412f95a8e4 100644 --- a/include/mysqld_error.h +++ b/include/mysqld_error.h @@ -205,4 +205,7 @@ #define ER_SLAVE_THREAD 1202 #define ER_TOO_MANY_USER_CONNECTIONS 1203 #define ER_SET_CONSTANTS_ONLY 1204 -#define ER_ERROR_MESSAGES 205 +#define ER_LOCK_WAIT_TIMEOUT 1205 +#define ER_LOCK_TABLE_FULL 1206 +#define ER_READ_ONLY_TRANSACTION 1207 +#define ER_ERROR_MESSAGES 208 diff --git a/sql/field.cc b/sql/field.cc index 1f1f00b161b..629ae899494 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -4087,6 +4087,59 @@ const char *Field_blob::unpack(char *to, const char *from) } +#ifdef HAVE_GEMINI_DB +/* Blobs in Gemini tables are stored separately from the rows which contain +** them (except for tiny blobs, which are stored in the row). For all other +** blob types (blob, mediumblob, longblob), the row contains the length of +** the blob data and a blob id. These methods (pack_id, get_id, and +** unpack_id) handle packing and unpacking blob fields in Gemini rows. +*/ +char *Field_blob::pack_id(char *to, const char *from, ulonglong id, uint max_length) +{ + char *save=ptr; + ptr=(char*) from; + ulong length=get_length(); // Length of from string + if (length > max_length) + { + ptr=to; + length=max_length; + store_length(length); // Store max length + ptr=(char*) from; + } + else + memcpy(to,from,packlength); // Copy length + if (length) + { + int8store(to+packlength, id); + } + ptr=save; // Restore org row pointer + return to+packlength+sizeof(id); +} + + +ulonglong Field_blob::get_id(const char *from) +{ + ulonglong id = 0; + ulong length=get_length(from); + if (length) + longlongget(id, from+packlength); + return id; +} + + +const char *Field_blob::unpack_id(char *to, const char *from, const char *bdata) +{ + memcpy(to,from,packlength); + ulong length=get_length(from); + from+=packlength; + if (length) + memcpy_fixed(to+packlength, &bdata, sizeof(bdata)); + else + bzero(to+packlength,sizeof(bdata)); + return from+sizeof(ulonglong); +} +#endif /* HAVE_GEMINI_DB */ + /* Keys for blobs are like keys on varchars */ int Field_blob::pack_cmp(const char *a, const char *b, uint key_length) diff --git a/sql/field.h b/sql/field.h index 2f03d849c9b..b5d7c613701 100644 --- a/sql/field.h +++ b/sql/field.h @@ -869,6 +869,13 @@ public: } char *pack(char *to, const char *from, uint max_length= ~(uint) 0); const char *unpack(char *to, const char *from); +#ifdef HAVE_GEMINI_DB + char *pack_id(char *to, const char *from, ulonglong id, + uint max_length= ~(uint) 0); + ulonglong get_id(const char *from); + const char *unpack_id(char *to, const char *from, const char *bdata); + enum_field_types blobtype() { return (packlength == 1 ? FIELD_TYPE_TINY_BLOB : FIELD_TYPE_BLOB);} +#endif char *pack_key(char *to, const char *from, uint max_length); char *pack_key_from_key_image(char* to, const char *from, uint max_length); int pack_cmp(const char *a, const char *b, uint key_length); diff --git a/sql/ha_gemini.cc b/sql/ha_gemini.cc index 73241c60be7..733f0aa3a7d 100644 --- a/sql/ha_gemini.cc +++ b/sql/ha_gemini.cc @@ -19,10 +19,13 @@ #pragma implementation // gcc: Class implementation #endif -#include "mysql_priv.h" -#ifdef HAVE_GEMINI_DB +#include <string.h> +#include "mysql_priv.h" #include "my_pthread.h" + +#ifdef HAVE_GEMINI_DB +#include "ha_gemini.h" #include "dbconfig.h" #include "dsmpub.h" #include "recpub.h" @@ -34,7 +37,17 @@ #include <hash.h> #include <stdarg.h> #include "geminikey.h" -#include "ha_gemini.h" + +#define gemini_msg MSGD_CALLBACK + +pthread_mutex_t gem_mutex; + +static HASH gem_open_tables; +static GEM_SHARE *get_share(const char *table_name, TABLE *table); +static int free_share(GEM_SHARE *share, bool mutex_is_locked); +static byte* gem_get_key(GEM_SHARE *share,uint *length, + my_bool not_used __attribute__((unused))); +static void gemini_lock_table_overflow_error(dsmContext_t *pcontext); const char *ha_gemini_ext=".gmd"; const char *ha_gemini_idx_ext=".gmi"; @@ -48,6 +61,7 @@ long gemini_locktablesize; long gemini_lock_wait_timeout; long gemini_spin_retries; long gemini_connection_limit; +char *gemini_basedir; const char gemini_dbname[] = "gemini"; dsmContext_t *pfirstContext = NULL; @@ -61,7 +75,7 @@ TYPELIB gemini_recovery_typelib= {array_elements(gemini_recovery_names),"", const int start_of_name = 2; /* Name passed as ./<db>/<table-name> and we're not interested in the ./ */ -static const int keyBufSize = MYMAXKEYSIZE * 2; +static const int keyBufSize = MAXKEYSZ + FULLKEYHDRSZ + MAX_REF_PARTS + 16; static int gemini_tx_begin(THD *thd); static void print_msg(THD *thd, const char *table_name, const char *op_name, @@ -87,40 +101,56 @@ bool gemini_init(void) goto badret; } + /* dsmContextCreate and dsmContextSetString(DSM_TAGDB_DBNAME) must + ** be the first DSM calls we make so that we can log any errors which + ** occur in subsequent DSM calls. DO NOT INSERT ANY DSM CALLS IN + ** BETWEEN THIS COMMENT AND THE COMMENT THAT SAYS "END OF CODE..." + */ /* Gotta connect to the database regardless of the operation */ rc = dsmContextCreate(&pfirstContext); if( rc != 0 ) { - printf("dsmContextCreate failed %ld\n",rc); + gemini_msg(pfirstContext, "dsmContextCreate failed %l",rc); goto badret; } + /* This call will also open the log file */ rc = dsmContextSetString(pfirstContext, DSM_TAGDB_DBNAME, strlen(gemini_dbname), (TEXT *)gemini_dbname); if( rc != 0 ) { - printf("Dbname tag failed %ld\n", rc); + gemini_msg(pfirstContext, "Dbname tag failed %l", rc); goto badret; } + /* END OF CODE NOT TO MESS WITH */ fn_format(pmsgsfile, GEM_MSGS_FILE, language, ".db", 2 | 4); rc = dsmContextSetString(pfirstContext, DSM_TAGDB_MSGS_FILE, strlen(pmsgsfile), (TEXT *)pmsgsfile); if( rc != 0 ) { - printf("MSGS_DIR tag failed %ld\n", rc); + gemini_msg(pfirstContext, "MSGS_DIR tag failed %l", rc); + goto badret; + } + + strxmov(pmsgsfile, gemini_basedir, GEM_SYM_FILE, NullS); + rc = dsmContextSetString(pfirstContext, DSM_TAGDB_SYMFILE, + strlen(pmsgsfile), (TEXT *)pmsgsfile); + if( rc != 0 ) + { + gemini_msg(pfirstContext, "SYMFILE tag failed %l", rc); goto badret; } rc = dsmContextSetLong(pfirstContext,DSM_TAGDB_ACCESS_TYPE,DSM_ACCESS_STARTUP); if ( rc != 0 ) { - printf("ACCESS TAG set failed %ld\n",rc); + gemini_msg(pfirstContext, "ACCESS TAG set failed %l",rc); goto badret; } rc = dsmContextSetLong(pfirstContext,DSM_TAGDB_ACCESS_ENV, DSM_SQL_ENGINE); if( rc != 0 ) { - printf("ACCESS_ENV set failed %ld",rc); + gemini_msg(pfirstContext, "ACCESS_ENV set failed %l",rc); goto badret; } @@ -129,7 +159,7 @@ bool gemini_init(void) (TEXT *)mysql_real_data_home); if( rc != 0 ) { - printf("Datadir tag failed %ld\n", rc); + gemini_msg(pfirstContext, "Datadir tag failed %l", rc); goto badret; } @@ -137,7 +167,7 @@ bool gemini_init(void) gemini_connection_limit); if(rc != 0) { - printf("MAX_USERS tag set failed %ld",rc); + gemini_msg(pfirstContext, "MAX_USERS tag set failed %l",rc); goto badret; } @@ -145,7 +175,7 @@ bool gemini_init(void) gemini_lock_wait_timeout); if(rc != 0) { - printf("MAX_LOCK_ENTRIES tag set failed %ld",rc); + gemini_msg(pfirstContext, "MAX_LOCK_ENTRIES tag set failed %l",rc); goto badret; } @@ -153,7 +183,7 @@ bool gemini_init(void) gemini_locktablesize); if(rc != 0) { - printf("MAX_LOCK_ENTRIES tag set failed %ld",rc); + gemini_msg(pfirstContext, "MAX_LOCK_ENTRIES tag set failed %l",rc); goto badret; } @@ -161,7 +191,7 @@ bool gemini_init(void) gemini_spin_retries); if(rc != 0) { - printf("SPIN_AMOUNT tag set failed %ld",rc); + gemini_msg(pfirstContext, "SPIN_AMOUNT tag set failed %l",rc); goto badret; } @@ -172,22 +202,22 @@ bool gemini_init(void) gemini_buffer_cache); if(rc != 0) { - printf("DB_BUFFERS tag set failed %ld",rc); + gemini_msg(pfirstContext, "DB_BUFFERS tag set failed %l",rc); goto badret; } rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_FLUSH_AT_COMMIT, - ((gemini_options & GEMOPT_FLUSH_LOG) ? 1 : 0)); + ((gemini_options & GEMOPT_FLUSH_LOG) ? 0 : 1)); if(rc != 0) { - printf("FLush_Log_At_Commit tag set failed %ld",rc); + gemini_msg(pfirstContext, "FLush_Log_At_Commit tag set failed %l",rc); goto badret; } rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_DIRECT_IO, ((gemini_options & GEMOPT_UNBUFFERED_IO) ? 1 : 0)); if(rc != 0) { - printf("DIRECT_IO tag set failed %ld",rc); + gemini_msg(pfirstContext, "DIRECT_IO tag set failed %l",rc); goto badret; } @@ -195,10 +225,20 @@ bool gemini_init(void) ((gemini_recovery_options & GEMINI_RECOVERY_FULL) ? 1 : 0)); if(rc != 0) { - printf("CRASH_PROTECTION tag set failed %ld",rc); + gemini_msg(pfirstContext, "CRASH_PROTECTION tag set failed %l",rc); goto badret; } + if (gemini_recovery_options & GEMINI_RECOVERY_FORCE) + { + rc = dsmContextSetLong(pfirstContext, DSM_TAGDB_FORCE_ACCESS, 1); + if(rc != 0) + { + printf("CRASH_PROTECTION tag set failed %ld",rc); + goto badret; + } + } + /* cluster size will come in bytes, need to convert it to 16 K units. */ gemini_log_cluster_size = (gemini_log_cluster_size + 16383) / 16384; @@ -207,7 +247,7 @@ bool gemini_init(void) if(rc != 0) { - printf("CRASH_PROTECTION tag set failed %ld",rc); + gemini_msg(pfirstContext, "CRASH_PROTECTION tag set failed %l",rc); goto badret; } @@ -215,12 +255,20 @@ bool gemini_init(void) DSM_DB_OPENDB | DSM_DB_OPENFILE); if( rc != 0 ) { - printf("dsmUserConnect failed rc = %ld\n",rc); + /* Message is output in dbenv() */ goto badret; } /* Set access to shared for subsequent user connects */ rc = dsmContextSetLong(pfirstContext,DSM_TAGDB_ACCESS_TYPE,DSM_ACCESS_SHARED); + rc = gemini_helper_threads(pfirstContext); + + + (void) hash_init(&gem_open_tables,32,0,0, + (hash_get_key) gem_get_key,0,0); + pthread_mutex_init(&gem_mutex,NULL); + + DBUG_RETURN(0); badret: @@ -231,30 +279,40 @@ badret: static int gemini_helper_threads(dsmContext_t *pContext) { int rc = 0; + int i; + pthread_attr_t thr_attr; + pthread_t hThread; DBUG_ENTER("gemini_helper_threads"); - rc = pthread_create (&hThread, 0, gemini_watchdog, (void *)pContext); + + (void) pthread_attr_init(&thr_attr); +#if !defined(HAVE_DEC_3_2_THREADS) + pthread_attr_setscope(&thr_attr,PTHREAD_SCOPE_SYSTEM); + (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + pthread_attr_setstacksize(&thr_attr,32768); +#endif + rc = pthread_create (&hThread, &thr_attr, gemini_watchdog, (void *)pContext); if (rc) { - printf("Can't create gemini watchdog thread"); + gemini_msg(pContext, "Can't Create gemini watchdog thread"); goto done; } if(!gemini_io_threads) goto done; - rc = pthread_create(&hThread, 0, gemini_rl_writer, (void *)pContext); + rc = pthread_create(&hThread, &thr_attr, gemini_rl_writer, (void *)pContext); if(rc) { - printf("Can't create gemini recovery log writer thread"); + gemini_msg(pContext, "Can't create Gemini recovery log writer thread"); goto done; } - for( int i = gemini_io_threads - 1;i;i--) + for(i = gemini_io_threads - 1;i;i--) { - rc = pthread_create(&hThread, 0, gemini_apw, (void *)pContext); + rc = pthread_create(&hThread, &thr_attr, gemini_apw, (void *)pContext); if(rc) { - printf("Can't create gemini page writer thread"); + gemini_msg(pContext, "Can't create Gemini database page writer thread"); goto done; } } @@ -273,7 +331,7 @@ pthread_handler_decl(gemini_watchdog,arg ) rc = dsmContextCopy(pcontext,&pmyContext, DSMCONTEXTDB); if( rc != 0 ) { - printf("dsmContextCopy failed for watchdog %d\n",rc); + gemini_msg(pcontext, "dsmContextCopy failed for Gemini watchdog %d",rc); return 0; } @@ -281,7 +339,7 @@ pthread_handler_decl(gemini_watchdog,arg ) if( rc != 0 ) { - printf("dsmUserConnect failed for watchdog %d\n",rc); + gemini_msg(pcontext, "dsmUserConnect failed for Gemini watchdog %d",rc); return 0; } @@ -311,7 +369,7 @@ pthread_handler_decl(gemini_rl_writer,arg ) rc = dsmContextCopy(pcontext,&pmyContext, DSMCONTEXTDB); if( rc != 0 ) { - printf("dsmContextCopy failed for recovery log writer %d\n",rc); + gemini_msg(pcontext, "dsmContextCopy failed for Gemini recovery log writer %d",rc); return 0; } @@ -319,7 +377,7 @@ pthread_handler_decl(gemini_rl_writer,arg ) if( rc != 0 ) { - printf("dsmUserConnect failed for recovery log writer %d\n",rc); + gemini_msg(pcontext, "dsmUserConnect failed for Gemini recovery log writer %d",rc); return 0; } @@ -348,7 +406,7 @@ pthread_handler_decl(gemini_apw,arg ) rc = dsmContextCopy(pcontext,&pmyContext, DSMCONTEXTDB); if( rc != 0 ) { - printf("dsmContextCopy failed for gemini page writer %d\n",rc); + gemini_msg(pcontext, "dsmContextCopy failed for Gemini page writer %d",rc); my_thread_end(); return 0; } @@ -356,7 +414,7 @@ pthread_handler_decl(gemini_apw,arg ) if( rc != 0 ) { - printf("dsmUserConnect failed for gemini page writer %d\n",rc); + gemini_msg(pcontext, "dsmUserConnect failed for Gemini page writer %d",rc); my_thread_end(); return 0; } @@ -388,7 +446,7 @@ int gemini_set_option_long(int optid, long optval) } if (rc) { - printf("SPIN_AMOUNT tag set failed %ld",rc); + gemini_msg(pfirstContext, "SPIN_AMOUNT tag set failed %l",rc); } else { @@ -410,7 +468,7 @@ static int gemini_connect(THD *thd) DSMCONTEXTDB); if( rc != 0 ) { - printf("dsmContextCopy failed %ld\n",rc); + gemini_msg(pfirstContext, "dsmContextCopy failed %l",rc); return(rc); } @@ -418,7 +476,7 @@ static int gemini_connect(THD *thd) if( rc != 0 ) { - printf("dsmUserConnect failed %ld\n",rc); + gemini_msg(pfirstContext, "dsmUserConnect failed %l",rc); return(rc); } @@ -444,6 +502,9 @@ bool gemini_end(void) THD *thd; DBUG_ENTER("gemini_end"); + + hash_free(&gem_open_tables); + pthread_mutex_destroy(&gem_mutex); if(pfirstContext) { rc = dsmShutdownSet(pfirstContext, DSM_SHUTDOWN_NORMAL); @@ -534,6 +595,24 @@ int gemini_rollback_to_savepoint(THD *thd) DBUG_RETURN(rc); } +int gemini_recovery_logging(THD *thd, bool on) +{ + int error; + int noLogging; + + if(!thd->gemini.context) + return 0; + + if(on) + noLogging = 0; + else + noLogging = 1; + + error = dsmContextSetLong((dsmContext_t *)thd->gemini.context, + DSM_TAGCONTEXT_NO_LOGGING,noLogging); + return error; +} + /* gemDataType - translates from mysql data type constant to gemini key services data type contstant */ int gemDataType ( int mysqlType ) @@ -599,8 +678,13 @@ int ha_gemini::open(const char *name, int mode, uint test_if_locked) DBUG_ENTER("ha_gemini::open"); thd = current_thd; - thr_lock_init(&alock); - thr_lock_data_init(&alock,&lock,(void*)0); + /* Init shared structure */ + if (!(share=get_share(name,table))) + { + DBUG_RETURN(1); /* purecov: inspected */ + } + thr_lock_data_init(&share->lock,&lock,(void*) 0); + ref_length = sizeof(dsmRecid_t); if(thd->gemini.context == NULL) @@ -610,7 +694,7 @@ int ha_gemini::open(const char *name, int mode, uint test_if_locked) if(rc) return rc; } - if (!(rec_buff=my_malloc(table->rec_buff_length, + if (!(rec_buff=(byte*)my_malloc(table->rec_buff_length, MYF(MY_WME)))) { DBUG_RETURN(1); @@ -635,6 +719,12 @@ int ha_gemini::open(const char *name, int mode, uint test_if_locked) rc = dsmObjectNameToNum((dsmContext_t *)thd->gemini.context, (dsmText_t *)name_buff, &tableId); + if (rc) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Unable to find table number for %s", name_buff); + DBUG_RETURN(rc); + } } tableNumber = tableId; @@ -649,8 +739,33 @@ int ha_gemini::open(const char *name, int mode, uint test_if_locked) crashed while being in the midst of a repair operation */ rc = dsmTableStatus((dsmContext_t *)thd->gemini.context, tableNumber,&tableStatus); - if(tableStatus) + if(tableStatus == DSM_OBJECT_IN_REPAIR) tableStatus = HA_ERR_CRASHED; + + pthread_mutex_lock(&share->mutex); + share->use_count++; + pthread_mutex_unlock(&share->mutex); + + if (table->blob_fields) + { + /* Allocate room for the blob ids from an unpacked row. Note that + ** we may not actually need all of this space because tiny blobs + ** are stored in the packed row, not in a separate storage object + ** like larger blobs. But we allocate an entry for all blobs to + ** keep the code simpler. + */ + pBlobDescs = (gemBlobDesc_t *)my_malloc( + table->blob_fields * sizeof(gemBlobDesc_t), + MYF(MY_WME | MY_ZEROFILL)); + } + else + { + pBlobDescs = 0; + } + + get_index_stats(thd); + info(HA_STATUS_CONST); + DBUG_RETURN (rc); } @@ -680,6 +795,12 @@ int ha_gemini::index_open(char *tableName) rc = dsmObjectNameToNum((dsmContext_t *)thd->gemini.context, (dsmText_t *)tableName, &objectNumber); + if (rc) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Unable to file Index number for %s", tableName); + DBUG_RETURN(rc); + } pindexNumbers[i] = objectNumber; } } @@ -692,12 +813,22 @@ int ha_gemini::index_open(char *tableName) int ha_gemini::close(void) { DBUG_ENTER("ha_gemini::close"); - thr_lock_delete(&alock); - my_free(rec_buff,MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)rec_buff,MYF(MY_ALLOW_ZERO_PTR)); rec_buff = 0; my_free((char *)pindexNumbers,MYF(MY_ALLOW_ZERO_PTR)); pindexNumbers = 0; - DBUG_RETURN(0); + + if (pBlobDescs) + { + for (uint i = 0; i < table->blob_fields; i++) + { + my_free((char*)pBlobDescs[i].pBlob, MYF(MY_ALLOW_ZERO_PTR)); + } + my_free((char *)pBlobDescs, MYF(0)); + pBlobDescs = 0; + } + + DBUG_RETURN(free_share(share, 0)); } @@ -709,7 +840,7 @@ int ha_gemini::write_row(byte * record) DBUG_ENTER("write_row"); - if(tableStatus) + if(tableStatus == HA_ERR_CRASHED) DBUG_RETURN(tableStatus); thd = current_thd; @@ -737,10 +868,11 @@ int ha_gemini::write_row(byte * record) /* A set insert-id statement so set the auto-increment value if this value is higher than it's current value */ error = dsmTableAutoIncrement((dsmContext_t *)thd->gemini.context, - tableNumber, (ULONG64 *)&nr); + tableNumber, (ULONG64 *)&nr,1); if(thd->next_insert_id > nr) { - error = dsmTableAutoIncrementSet((dsmContext_t *)thd->gemini.context,tableNumber, + error = dsmTableAutoIncrementSet((dsmContext_t *)thd->gemini.context, + tableNumber, (ULONG64)thd->next_insert_id); } } @@ -749,11 +881,13 @@ int ha_gemini::write_row(byte * record) } dsmRecord.table = tableNumber; - dsmRecord.maxLength = table->reclength; + dsmRecord.maxLength = table->rec_buff_length; if ((error=pack_row((byte **)&dsmRecord.pbuffer, (int *)&dsmRecord.recLength, - record))) + record, FALSE))) + { DBUG_RETURN(error); + } error = dsmRecordCreate((dsmContext_t *)thd->gemini.context, &dsmRecord,0); @@ -769,6 +903,8 @@ int ha_gemini::write_row(byte * record) thd->gemini.needSavepoint = 1; } } + if(error == DSM_S_RQSTREJ) + error = HA_ERR_LOCK_WAIT_TIMEOUT; DBUG_RETURN(error); } @@ -777,10 +913,17 @@ longlong ha_gemini::get_auto_increment() { longlong nr; int error; + int update; THD *thd=current_thd; + if(thd->lex.sql_command == SQLCOM_SHOW_TABLES) + update = 0; + else + update = 1; + error = dsmTableAutoIncrement((dsmContext_t *)thd->gemini.context, - tableNumber, (ULONG64 *)&nr); + tableNumber, (ULONG64 *)&nr, + update); return nr; } @@ -828,8 +971,8 @@ int ha_gemini::handleIndexEntry(const byte * record, dsmRecid_t recid, expects that the three lead bytes of the header are not counted in this length -- But cxKeyPrepare also expects that these three bytes are present in the keystr */ - theKey.akey.keyLen = (COUNT)keyStringLen - 3; - theKey.akey.unknown_comp = thereIsAnull; + theKey.akey.keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; + theKey.akey.unknown_comp = (dsmBoolean_t)thereIsAnull; theKey.akey.word_index = 0; theKey.akey.descending_key =0; if(option == KEY_CREATE) @@ -880,6 +1023,7 @@ int ha_gemini::createKeyString(const byte * record, KEY *pkeyinfo, int componentLen; int fieldType; int isNull; + uint key_part_length; KEY_PART_INFO *key_part; @@ -892,21 +1036,35 @@ int ha_gemini::createKeyString(const byte * record, KEY *pkeyinfo, unsigned char *pos; key_part = pkeyinfo->key_part + i; + key_part_length = key_part->length; fieldType = gemDataType(key_part->field->type()); - if(fieldType == GEM_CHAR) + switch (fieldType) { + case GEM_CHAR: + { /* Save the current ptr to the field in case we're building a key to remove an old key value when an indexed character column gets updated. */ char *ptr = key_part->field->ptr; key_part->field->ptr = (char *)record + key_part->offset; - key_part->field->sort_string(rec_buff, key_part->length); + key_part->field->sort_string((char*)rec_buff, key_part->length); key_part->field->ptr = ptr; pos = (unsigned char *)rec_buff; - } - else - { + } + break; + + case GEM_TINYBLOB: + case GEM_BLOB: + case GEM_MEDIUMBLOB: + case GEM_LONGBLOB: + ((Field_blob*)key_part->field)->get_ptr((char**)&pos); + key_part_length = ((Field_blob*)key_part->field)->get_length( + (char*)record + key_part->offset); + break; + + default: pos = (unsigned char *)record + key_part->offset; + break; } isNull = record[key_part->null_offset] & key_part->null_bit; @@ -914,7 +1072,7 @@ int ha_gemini::createKeyString(const byte * record, KEY *pkeyinfo, *thereIsAnull = true; rc = gemFieldToIdxComponent(pos, - (unsigned long) key_part->length, + (unsigned long) key_part_length, fieldType, isNull , key_part->field->flags & UNSIGNED_FLAG, @@ -951,7 +1109,7 @@ int ha_gemini::update_row(const byte * old_record, byte * new_record) } for (uint keynr=0 ; keynr < table->keys ; keynr++) { - if(key_cmp(keynr,old_record, new_record)) + if(key_cmp(keynr,old_record, new_record,false)) { error = handleIndexEntry(old_record,lastRowid,KEY_DELETE,keynr); if(error) @@ -973,10 +1131,10 @@ int ha_gemini::update_row(const byte * old_record, byte * new_record) dsmRecord.table = tableNumber; dsmRecord.recid = lastRowid; - dsmRecord.maxLength = table->reclength; + dsmRecord.maxLength = table->rec_buff_length; if ((error=pack_row((byte **)&dsmRecord.pbuffer, (int *)&dsmRecord.recLength, - new_record))) + new_record, TRUE))) { DBUG_RETURN(error); } @@ -992,6 +1150,7 @@ int ha_gemini::delete_row(const byte * record) int error = 0; dsmRecord_t dsmRecord; THD *thd = current_thd; + dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; DBUG_ENTER("delete_row"); statistic_increment(ha_delete_count,&LOCK_status); @@ -999,9 +1158,7 @@ int ha_gemini::delete_row(const byte * record) if(thd->gemini.needSavepoint) { thd->gemini.savepoint++; - error = dsmTransaction((dsmContext_t *)thd->gemini.context, - &thd->gemini.savepoint, - DSMTXN_SAVE, 0, 0); + error = dsmTransaction(pcontext, &thd->gemini.savepoint, DSMTXN_SAVE, 0, 0); if (error) DBUG_RETURN(error); thd->gemini.needSavepoint = 0; @@ -1013,8 +1170,27 @@ int ha_gemini::delete_row(const byte * record) error = handleIndexEntries(record, dsmRecord.recid,KEY_DELETE); if(!error) { - error = dsmRecordDelete((dsmContext_t *)thd->gemini.context, - &dsmRecord, 0, NULL); + error = dsmRecordDelete(pcontext, &dsmRecord, 0, NULL); + } + + /* Delete any blobs associated with this row */ + if (table->blob_fields) + { + dsmBlob_t gemBlob; + + gemBlob.areaType = DSMOBJECT_BLOB; + gemBlob.blobObjNo = tableNumber; + for (uint i = 0; i < table->blob_fields; i++) + { + if (pBlobDescs[i].blobId) + { + gemBlob.blobId = pBlobDescs[i].blobId; + my_free((char *)pBlobDescs[i].pBlob, MYF(MY_ALLOW_ZERO_PTR)); + dsmBlobStart(pcontext, &gemBlob); + dsmBlobDelete(pcontext, &gemBlob, NULL); + /* according to DSM doc, no need to call dsmBlobEnd() */ + } + } } DBUG_RETURN(error); @@ -1023,7 +1199,6 @@ int ha_gemini::delete_row(const byte * record) int ha_gemini::index_init(uint keynr) { int error = 0; - int keyStringLen; THD *thd; DBUG_ENTER("index_init"); thd = current_thd; @@ -1046,19 +1221,9 @@ int ha_gemini::index_init(uint keynr) } pbracketBase->index = 0; pbracketLimit->index = (dsmIndex_t)pindexNumbers[keynr]; - pbracketLimit->keycomps = 1; - keyStringLen = 0; - error = gemKeyHigh(pbracketLimit->keystr, &keyStringLen, - pbracketLimit->index); - - /* We have to subtract three here since cxKeyPrepare - expects that the three lead bytes of the header are - not counted in this length -- But cxKeyPrepare also - expects that these three bytes are present in the keystr */ - pbracketLimit->keyLen = (COUNT)keyStringLen - 3; - pbracketBase->descending_key = pbracketLimit->descending_key = 0; pbracketBase->ksubstr = pbracketLimit->ksubstr = 0; + pbracketLimit->keycomps = pbracketBase->keycomps = 1; pfoundKey = (dsmKey_t *)my_malloc(sizeof(dsmKey_t) + keyBufSize,MYF(MY_WME)); if(!pfoundKey) @@ -1130,6 +1295,7 @@ int ha_gemini::pack_key( uint keynr, dsmKey_t *pkey, { uint offset=0; unsigned char *pos; + uint key_part_length = key_part->length; int fieldType; if (key_part->null_bit) @@ -1141,7 +1307,7 @@ int ha_gemini::pack_key( uint keynr, dsmKey_t *pkey, key_ptr+= key_part->store_length; rc = gemFieldToIdxComponent( (unsigned char *)key_ptr + offset, - (unsigned long) key_part->length, + (unsigned long) key_part_length, 0, 1 , /* Tells it to build a null component */ key_part->field->flags & UNSIGNED_FLAG, @@ -1153,20 +1319,31 @@ int ha_gemini::pack_key( uint keynr, dsmKey_t *pkey, } } fieldType = gemDataType(key_part->field->type()); - if(fieldType == GEM_CHAR) + switch (fieldType) { - key_part->field->store(key_ptr + offset, key_part->length); - key_part->field->sort_string(rec_buff, key_part->length); + case GEM_CHAR: + key_part->field->store((char*)key_ptr + offset, key_part->length); + key_part->field->sort_string((char*)rec_buff, key_part->length); pos = (unsigned char *)rec_buff; - } - else - { + break; + + case GEM_TINYBLOB: + case GEM_BLOB: + case GEM_MEDIUMBLOB: + case GEM_LONGBLOB: + ((Field_blob*)key_part->field)->get_ptr((char**)&pos); + key_part_length = ((Field_blob*)key_part->field)->get_length( + (char*)key_ptr + offset); + break; + + default: pos = (unsigned char *)key_ptr + offset; + break; } rc = gemFieldToIdxComponent( pos, - (unsigned long) key_part->length, + (unsigned long) key_part_length, fieldType, 0 , key_part->field->flags & UNSIGNED_FLAG, @@ -1189,7 +1366,7 @@ void ha_gemini::unpack_key(char *record, dsmKey_t *key, uint index) int fieldIsNull, fieldType; int rc = 0; - char unsigned *pos= &key->keystr[7]; + char unsigned *pos= &key->keystr[FULLKEYHDRSZ+4/* 4 for the index number*/]; for ( ; key_part != end; key_part++) { @@ -1202,7 +1379,8 @@ void ha_gemini::unpack_key(char *record, dsmKey_t *key, uint index) } rc = gemIdxComponentToField(pos, fieldType, (unsigned char *)record + key_part->field->offset(), - key_part->field->field_length, + //key_part->field->field_length, + key_part->length, key_part->field->decimals(), &fieldIsNull); if(fieldIsNull) @@ -1266,12 +1444,12 @@ int ha_gemini::index_read(byte * buf, const byte * key, pbracketLimit->keyLen = componentLen; } - /* We have to subtract three here since cxKeyPrepare + /* We have to subtract the header size here since cxKeyPrepare expects that the three lead bytes of the header are not counted in this length -- But cxKeyPrepare also expects that these three bytes are present in the keystr */ - pbracketBase->keyLen -= 3; - pbracketLimit->keyLen -= 3; + pbracketBase->keyLen -= FULLKEYHDRSZ; + pbracketLimit->keyLen -= FULLKEYHDRSZ; thd = current_thd; @@ -1294,7 +1472,7 @@ int ha_gemini::index_next(byte * buf) dsmMask_t findMode; DBUG_ENTER("index_next"); - if(tableStatus) + if(tableStatus == HA_ERR_CRASHED) DBUG_RETURN(tableStatus); thd = current_thd; @@ -1304,9 +1482,12 @@ int ha_gemini::index_next(byte * buf) error = gemKeyLow(pbracketBase->keystr, &keyStringLen, pbracketLimit->index); - pbracketBase->keyLen = (COUNT)keyStringLen - 3; + pbracketBase->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; pbracketBase->index = pbracketLimit->index; - pbracketBase->keycomps = 1; + error = gemKeyHigh(pbracketLimit->keystr, &keyStringLen, + pbracketLimit->index); + pbracketLimit->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; + findMode = DSMFINDFIRST; } else @@ -1369,24 +1550,20 @@ int ha_gemini::index_last(byte * buf) error = gemKeyLow(pbracketBase->keystr, &keyStringLen, pbracketLimit->index); - if(error) - goto errorReturn; - pbracketBase->keyLen = (COUNT)keyStringLen - 3; + pbracketBase->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; pbracketBase->index = pbracketLimit->index; - pbracketBase->keycomps = 1; + error = gemKeyHigh(pbracketLimit->keystr, &keyStringLen, + pbracketLimit->index); + pbracketLimit->keyLen = (COUNT)keyStringLen - FULLKEYHDRSZ; error = findRow(thd,DSMFINDLAST,buf); -errorReturn: if (error == DSM_S_ENDLOOP) error = HA_ERR_END_OF_FILE; table->status = error ? STATUS_NOT_FOUND : 0; DBUG_RETURN(error); - - table->status = error ? STATUS_NOT_FOUND : 0; - DBUG_RETURN(error); } int ha_gemini::rnd_init(bool scan) @@ -1414,7 +1591,7 @@ int ha_gemini::rnd_next(byte *buf) DBUG_ENTER("rnd_next"); - if(tableStatus) + if(tableStatus == HA_ERR_CRASHED) DBUG_RETURN(tableStatus); thd = current_thd; @@ -1429,7 +1606,7 @@ int ha_gemini::rnd_next(byte *buf) dsmRecord.recid = lastRowid; dsmRecord.pbuffer = (dsmBuffer_t *)rec_buff; dsmRecord.recLength = table->reclength; - dsmRecord.maxLength = table->reclength; + dsmRecord.maxLength = table->rec_buff_length; error = dsmTableScan((dsmContext_t *)thd->gemini.context, &dsmRecord, DSMFINDNEXT, lockMode, 0); @@ -1437,17 +1614,23 @@ int ha_gemini::rnd_next(byte *buf) if(!error) { lastRowid = dsmRecord.recid; - unpack_row((char *)buf,(char *)dsmRecord.pbuffer); + error = unpack_row((char *)buf,(char *)dsmRecord.pbuffer); } if(!error) ; - else if (error == DSM_S_ENDLOOP) - error = HA_ERR_END_OF_FILE; - else if (error == DSM_S_RQSTREJ) - error = HA_ERR_LOCK_WAIT_TIMEOUT; - else if (error == DSM_S_LKTBFULL) - error = HA_ERR_LOCK_TABLE_FULL; - + else + { + lastRowid = 0; + if (error == DSM_S_ENDLOOP) + error = HA_ERR_END_OF_FILE; + else if (error == DSM_S_RQSTREJ) + error = HA_ERR_LOCK_WAIT_TIMEOUT; + else if (error == DSM_S_LKTBFULL) + { + error = HA_ERR_LOCK_TABLE_FULL; + gemini_lock_table_overflow_error((dsmContext_t *)thd->gemini.context); + } + } table->status = error ? STATUS_NOT_FOUND : 0; DBUG_RETURN(error); } @@ -1500,14 +1683,14 @@ int ha_gemini::fetch_row(void *gemini_context,const byte *buf) dsmRecord.recid = lastRowid; dsmRecord.pbuffer = (dsmBuffer_t *)rec_buff; dsmRecord.recLength = table->reclength; - dsmRecord.maxLength = table->reclength; + dsmRecord.maxLength = table->rec_buff_length; rc = dsmRecordGet((dsmContext_t *)gemini_context, &dsmRecord, 0); if(!rc) { - unpack_row((char *)buf,(char *)dsmRecord.pbuffer); + rc = unpack_row((char *)buf,(char *)dsmRecord.pbuffer); } DBUG_RETURN(rc); @@ -1544,7 +1727,7 @@ int ha_gemini::findRow(THD *thd, dsmMask_t findMode, byte *buf) if(key_read) { - unpack_key(buf, pkey, active_index); + unpack_key((char*)buf, pkey, active_index); } if(!key_read) /* unpack_key may have turned off key_read */ { @@ -1554,10 +1737,17 @@ int ha_gemini::findRow(THD *thd, dsmMask_t findMode, byte *buf) errorReturn: if(!rc) ; - else if(rc == DSM_S_RQSTREJ) - rc = HA_ERR_LOCK_WAIT_TIMEOUT; - else if (rc == DSM_S_LKTBFULL) - rc = HA_ERR_LOCK_TABLE_FULL; + else + { + lastRowid = 0; + if(rc == DSM_S_RQSTREJ) + rc = HA_ERR_LOCK_WAIT_TIMEOUT; + else if (rc == DSM_S_LKTBFULL) + { + rc = HA_ERR_LOCK_TABLE_FULL; + gemini_lock_table_overflow_error((dsmContext_t *)thd->gemini.context); + } + } DBUG_RETURN(rc); } @@ -1578,25 +1768,47 @@ void ha_gemini::info(uint flag) dsmStatus_t error; ULONG64 rows; + if(thd->gemini.context == NULL) + { + /* Need to get this thread a connection into the database */ + error = gemini_connect(thd); + if(error) + DBUG_VOID_RETURN; + } + error = dsmRowCount((dsmContext_t *)thd->gemini.context,tableNumber,&rows); records = (ha_rows)rows; deleted = 0; } - else if ((flag & HA_STATUS_CONST)) + if ((flag & HA_STATUS_CONST)) { - ; + ha_rows *rec_per_key = share->rec_per_key; + for (uint i = 0; i < table->keys; i++) + for(uint k=0; + k < table->key_info[i].key_parts; k++,rec_per_key++) + table->key_info[i].rec_per_key[k] = *rec_per_key; } - else if ((flag & HA_STATUS_ERRKEY)) + if ((flag & HA_STATUS_ERRKEY)) { errkey=last_dup_key; } - else if ((flag & HA_STATUS_TIME)) + if ((flag & HA_STATUS_TIME)) { ; } - else if ((flag & HA_STATUS_AUTO)) + if ((flag & HA_STATUS_AUTO)) { - ; + THD *thd = current_thd; + dsmStatus_t error; + + error = dsmTableAutoIncrement((dsmContext_t *)thd->gemini.context, + tableNumber, + (ULONG64 *)&auto_increment_value, + 0); + /* Should return the next auto-increment value that + will be given -- so we need to increment the one dsm + currently reports. */ + auto_increment_value++; } DBUG_VOID_RETURN; @@ -1658,7 +1870,22 @@ int ha_gemini::external_lock(THD *thd, int lock_type) thd->gemini.lock_count = 1; thd->gemini.tx_isolation = thd->tx_isolation; } - + // lockMode has already been set in store_lock + // If the statement about to be executed calls for + // exclusive locks and we're running at read uncommitted + // isolation level then raise an error. + if(thd->gemini.tx_isolation == ISO_READ_UNCOMMITTED) + { + if(lockMode == DSM_LK_EXCL) + { + DBUG_RETURN(HA_ERR_READ_ONLY_TRANSACTION); + } + else + { + lockMode = DSM_LK_NOLOCK; + } + } + if(thd->gemini.context == NULL) { /* Need to get this thread a connection into the database */ @@ -1678,6 +1905,8 @@ int ha_gemini::external_lock(THD *thd, int lock_type) rc = dsmObjectLock((dsmContext_t *)thd->gemini.context, (dsmObject_t)tableNumber,DSMOBJECT_TABLE,0, lockMode, 1, 0); + if(rc == DSM_S_RQSTREJ) + rc = HA_ERR_LOCK_WAIT_TIMEOUT; } } else /* lock_type == F_UNLK */ @@ -1703,18 +1932,24 @@ THR_LOCK_DATA **ha_gemini::store_lock(THD *thd, THR_LOCK_DATA **to, !thd->in_lock_tables) lock_type = TL_WRITE_ALLOW_WRITE; lock.type=lock_type; - - if(thd->gemini.tx_isolation == ISO_READ_UNCOMMITTED) - lockMode = DSM_LK_NOLOCK; - else if(table->reginfo.lock_type > TL_WRITE_ALLOW_READ) - lockMode = DSM_LK_EXCL; - else - lockMode = DSM_LK_SHARE; } + if(table->reginfo.lock_type > TL_WRITE_ALLOW_READ) + lockMode = DSM_LK_EXCL; + else + lockMode = DSM_LK_SHARE; + *to++= &lock; return to; } +void ha_gemini::update_create_info(HA_CREATE_INFO *create_info) +{ + table->file->info(HA_STATUS_AUTO | HA_STATUS_CONST); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value=auto_increment_value; + } +} int ha_gemini::create(const char *name, register TABLE *form, HA_CREATE_INFO *create_info) @@ -1777,7 +2012,7 @@ int ha_gemini::create(const char *name, register TABLE *form, (dsmText_t *)"gemini_data_area"); if( rc != 0 ) { - printf("dsmAreaNew failed %ld\n",rc); + gemini_msg(pcontext, "dsmAreaNew failed %l",rc); return(rc); } @@ -1787,7 +2022,7 @@ int ha_gemini::create(const char *name, register TABLE *form, (dsmText_t *)&name_buff[start_of_name]); if( rc != 0 ) { - printf("dsmExtentCreate failed %ld\n",rc); + gemini_msg(pcontext, "dsmExtentCreate failed %l",rc); return(rc); } @@ -1805,6 +2040,20 @@ int ha_gemini::create(const char *name, register TABLE *form, (dsmText_t *)&name_buff[start_of_name], &dummy,&dummy); + if (rc == 0 && table->blob_fields) + { + /* create a storage object record for blob fields */ + rc = dsmObjectCreate(pcontext, areaNumber, &tableNumber, + DSMOBJECT_BLOB,0,0,0, + (dsmText_t *)&name_buff[start_of_name], + &dummy,&dummy); + if( rc != 0 ) + { + gemini_msg(pcontext, "dsmObjectCreate for blob object failed %l",rc); + return(rc); + } + } + if(rc == 0 && form->keys) { fn_format(name_buff, name, "", ha_gemini_idx_ext, 2 | 4); @@ -1814,7 +2063,7 @@ int ha_gemini::create(const char *name, register TABLE *form, (dsmText_t *)"gemini_index_area"); if( rc != 0 ) { - printf("dsmAreaNew failed %ld\n",rc); + gemini_msg(pcontext, "dsmAreaNew failed %l",rc); return(rc); } /* Create an extent */ @@ -1823,7 +2072,7 @@ int ha_gemini::create(const char *name, register TABLE *form, (dsmText_t *)&name_buff[start_of_name]); if( rc != 0 ) { - printf("dsmExtentCreate failed %ld\n",rc); + gemini_msg(pcontext, "dsmExtentCreate failed %l",rc); return(rc); } @@ -1859,10 +2108,11 @@ int ha_gemini::create(const char *name, register TABLE *form, } } - rc = dsmTableAutoIncrementSet(pcontext,tableNumber, - create_info->auto_increment_value); - - + /* The auto_increment value is the next one to be given + out so give dsm one less than this value */ + if(create_info->auto_increment_value) + rc = dsmTableAutoIncrementSet(pcontext,tableNumber, + create_info->auto_increment_value-1); /* Get a table lock on this table in case this table is being created as part of an alter table statement. We don't want @@ -1950,26 +2200,25 @@ int ha_gemini::delete_table(const char *pname) (dsmObject_t *)&tableNum); if (rc) { - printf("Cound not find table number for %s with string %s, %ld\n", - pname,name_buff,rc); + gemini_msg(pcontext, "Unable to find table number for %s", name_buff); rc = gemini_rollback(thd); if (rc) { - printf("Error in rollback %ld\n",rc); + gemini_msg(pcontext, "Error in rollback %l",rc); } DBUG_RETURN(rc); } - rc = dsmObjectInfo(pcontext, tableNum, DSMOBJECT_MIXTABLE, &tableArea, - &objectAttr, &associate, &associateType, &block, &root); + rc = dsmObjectInfo(pcontext, tableNum, DSMOBJECT_MIXTABLE, tableNum, + &tableArea, &objectAttr, &associateType, &block, &root); if (rc) { - printf("Failed to get area number for table %d, %s, return %ld\n", + gemini_msg(pcontext, "Failed to get area number for table %d, %s, return %l", tableNum, pname, rc); rc = gemini_rollback(thd); if (rc) { - printf("Error in rollback %ld\n",rc); + gemini_msg(pcontext, "Error in rollback %l",rc); } } @@ -1979,14 +2228,14 @@ int ha_gemini::delete_table(const char *pname) rc = dsmObjectDeleteAssociate(pcontext, tableNum, &indexArea); if (rc) { - printf("Error deleting storage objects for table number %d, return %ld\n", + gemini_msg(pcontext, "Error deleting storage objects for table number %d, return %l", (int)tableNum, rc); /* roll back txn and return */ rc = gemini_rollback(thd); if (rc) { - printf("Error in rollback %ld\n",rc); + gemini_msg(pcontext, "Error in rollback %l",rc); } DBUG_RETURN(rc); } @@ -1994,33 +2243,33 @@ int ha_gemini::delete_table(const char *pname) if (indexArea != DSMAREA_INVALID) { /* Delete the extents for both Index and Table */ - rc = dsmExtentDelete(pcontext, indexArea, 0); + rc = dsmExtentDelete(pcontext, indexArea); rc = dsmAreaDelete(pcontext, indexArea); if (rc) { - printf("Error deleting Index Area %ld, return %ld\n", indexArea, rc); + gemini_msg(pcontext, "Error deleting Index Area %l, return %l", indexArea, rc); /* roll back txn and return */ rc = gemini_rollback(thd); if (rc) { - printf("Error in rollback %ld\n",rc); + gemini_msg(pcontext, "Error in rollback %l",rc); } DBUG_RETURN(rc); } } - rc = dsmExtentDelete(pcontext, tableArea, 0); + rc = dsmExtentDelete(pcontext, tableArea); rc = dsmAreaDelete(pcontext, tableArea); if (rc) { - printf("Error deleting table Area %ld, name %s, return %ld\n", + gemini_msg(pcontext, "Error deleting table Area %l, name %s, return %l", tableArea, pname, rc); /* roll back txn and return */ rc = gemini_rollback(thd); if (rc) { - printf("Error in rollback %ld\n",rc); + gemini_msg(pcontext, "Error in rollback %l",rc); } DBUG_RETURN(rc); } @@ -2030,7 +2279,7 @@ int ha_gemini::delete_table(const char *pname) rc = gemini_commit(thd); if (rc) { - printf("Failed to commit transaction %ld\n",rc); + gemini_msg(pcontext, "Failed to commit transaction %l",rc); } @@ -2047,7 +2296,6 @@ int ha_gemini::rename_table(const char *pfrom, const char *pto) THD *thd; dsmContext_t *pcontext; dsmStatus_t rc; - char tabname_buff[FN_REFLEN]; char dbname_buff[FN_REFLEN]; char name_buff[FN_REFLEN]; char newname_buff[FN_REFLEN]; @@ -2056,6 +2304,7 @@ int ha_gemini::rename_table(const char *pfrom, const char *pto) unsigned i, nameLen; dsmObject_t tableNum; dsmArea_t indexArea = 0; + dsmArea_t tableArea = 0; DBUG_ENTER("ha_gemini::rename_table"); @@ -2068,7 +2317,7 @@ int ha_gemini::rename_table(const char *pfrom, const char *pto) { if (gemini_is_vst(name_buff)) { - return 0; + return DSM_S_CANT_RENAME_VST; } } } @@ -2113,21 +2362,51 @@ int ha_gemini::rename_table(const char *pfrom, const char *pto) rc = dsmObjectNameToNum(pcontext, (dsmText_t *)name_buff, &tableNum); if (rc) + { + gemini_msg(pcontext, "Unable to file Table number for %s", name_buff); goto errorReturn; + } rc = dsmObjectRename(pcontext, tableNum, (dsmText_t *)newname_buff, (dsmText_t *)&newidxextname_buff[start_of_name], (dsmText_t *)&newextname_buff[start_of_name], - &indexArea); + &indexArea, &tableArea); if (rc) + { + gemini_msg(pcontext, "Failed to rename %s to %s",name_buff,newname_buff); goto errorReturn; + } + + /* Rename the physical table and index files (if necessary). + ** Close the file, rename it, and reopen it (have to do it this + ** way so rename works on Windows). + */ + if (!(rc = dsmAreaClose(pcontext, tableArea))) + { + if (!(rc = rename_file_ext(pfrom, pto, ha_gemini_ext))) + { + rc = dsmAreaOpen(pcontext, tableArea, 0); + if (rc) + { + gemini_msg(pcontext, "Failed to reopen area %d",tableArea); + } + } + } - /* rename the physical table and index files (if necessary) */ - rc = rename_file_ext(pfrom, pto, ha_gemini_ext); if (!rc && indexArea) { - rc = rename_file_ext(pfrom, pto, ha_gemini_idx_ext); + if (!(rc = dsmAreaClose(pcontext, indexArea))) + { + if (!(rc = rename_file_ext(pfrom, pto, ha_gemini_idx_ext))) + { + rc = dsmAreaOpen(pcontext, indexArea, 0); + if (rc) + { + gemini_msg(pcontext, "Failed to reopen area %d",tableArea); + } + } + } } errorReturn: @@ -2143,17 +2422,38 @@ errorReturn: double ha_gemini::scan_time() { - return records / (gemini_blocksize / table->reclength); + return (double)records / + (double)((gemini_blocksize / (double)table->reclength)); } -int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) +int ha_gemini::analyze(THD* thd, HA_CHECK_OPT* check_opt) { int error; + uint saveIsolation; + dsmMask_t saveLockMode; + + check_opt->quick = true; + check_opt->optimize = true; // Tells check not to get table lock + saveLockMode = lockMode; + saveIsolation = thd->gemini.tx_isolation; + thd->gemini.tx_isolation = ISO_READ_UNCOMMITTED; + lockMode = DSM_LK_NOLOCK; + error = check(thd,check_opt); + lockMode = saveLockMode; + thd->gemini.tx_isolation = saveIsolation; + return (error); +} + +int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) +{ + int error = 0; int checkStatus = HA_ADMIN_OK; ha_rows indexCount; - byte *buf = 0, *indexBuf = 0; + byte *buf = 0, *indexBuf = 0, *prevBuf = 0; int errorCount = 0; + info(HA_STATUS_VARIABLE); // Makes sure row count is up to date + /* Get a shared table lock */ if(thd->gemini.needSavepoint) { @@ -2167,23 +2467,33 @@ int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) return(error); thd->gemini.needSavepoint = 0; } - buf = my_malloc(table->rec_buff_length,MYF(MY_WME)); - indexBuf = my_malloc(table->rec_buff_length,MYF(MY_WME)); + buf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME)); + indexBuf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME)); + prevBuf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME |MY_ZEROFILL )); + /* Lock the table */ - error = dsmObjectLock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber, - DSMOBJECT_TABLE,0, - DSM_LK_SHARE, 1, 0); + if (!check_opt->optimize) + error = dsmObjectLock((dsmContext_t *)thd->gemini.context, + (dsmObject_t)tableNumber, + DSMOBJECT_TABLE,0, + DSM_LK_SHARE, 1, 0); if(error) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Failed to lock table %d, error %d",tableNumber, error); return error; + } - info(HA_STATUS_VARIABLE); - + ha_rows *rec_per_key = share->rec_per_key; /* If quick option just scan along index converting and counting entries */ for (uint i = 0; i < table->keys; i++) { - key_read = 1; + key_read = 1; // Causes data to be extracted from the keys indexCount = 0; + // Clear the cardinality stats for this index + memset(table->key_info[i].rec_per_key,0, + sizeof(table->key_info[0].rec_per_key[0]) * + table->key_info[i].key_parts); error = index_init(i); error = index_first(indexBuf); while(!error) @@ -2195,8 +2505,12 @@ int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) error = fetch_row(thd->gemini.context,buf); if(!error) { - if(key_cmp(i,buf,indexBuf)) + if(key_cmp(i,buf,indexBuf,false)) { + + gemini_msg((dsmContext_t *)thd->gemini.context, + "Check Error! Key does not match row for rowid %d for index %s", + lastRowid,table->key_info[i].name); print_msg(thd,table->real_name,"check","error", "Key does not match row for rowid %d for index %s", lastRowid,table->key_info[i].name); @@ -2209,6 +2523,9 @@ int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) { errorCount++; checkStatus = HA_ADMIN_CORRUPT; + gemini_msg((dsmContext_t *)thd->gemini.context, + "Check Error! Key does not have a valid row pointer %d for index %s", + lastRowid,table->key_info[i].name); print_msg(thd,table->real_name,"check","error", "Key does not have a valid row pointer %d for index %s", lastRowid,table->key_info[i].name); @@ -2218,10 +2535,27 @@ int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) } } } + + key_cmp(i,indexBuf,prevBuf,true); + bcopy((void *)indexBuf,(void *)prevBuf,table->rec_buff_length); + if(!error) error = index_next(indexBuf); } - + + for(uint j=1; j < table->key_info[i].key_parts; j++) + { + table->key_info[i].rec_per_key[j] += table->key_info[i].rec_per_key[j-1]; + } + for(uint k=0; k < table->key_info[i].key_parts; k++) + { + if (table->key_info[i].rec_per_key[k]) + table->key_info[i].rec_per_key[k] = + records / table->key_info[i].rec_per_key[k]; + *rec_per_key = table->key_info[i].rec_per_key[k]; + rec_per_key++; + } + if(error == HA_ERR_END_OF_FILE) { /* Check count of rows */ @@ -2231,6 +2565,10 @@ int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) /* Number of index entries does not agree with the number of rows in the index. */ checkStatus = HA_ADMIN_CORRUPT; + gemini_msg((dsmContext_t *)thd->gemini.context, + "Check Error! Total rows %d does not match total index entries %d for %s", + records, indexCount, + table->key_info[i].name); print_msg(thd,table->real_name,"check","error", "Total rows %d does not match total index entries %d for %s", records, indexCount, @@ -2248,23 +2586,61 @@ int ha_gemini::check(THD* thd, HA_CHECK_OPT* check_opt) { /* Now scan the table and for each row generate the keys and find them in the index */ - error = fullCheck(thd, buf);\ + error = fullCheck(thd, buf); if(error) checkStatus = error; } + // Store the key distribution information + error = saveKeyStats(thd); error_return: - my_free(buf,MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)buf,MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)indexBuf,MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)prevBuf,MYF(MY_ALLOW_ZERO_PTR)); + index_end(); key_read = 0; - error = dsmObjectUnlock((dsmContext_t *)thd->gemini.context, - (dsmObject_t)tableNumber, - DSMOBJECT_TABLE,0, - DSM_LK_SHARE,0); + if(!check_opt->optimize) + { + error = dsmObjectUnlock((dsmContext_t *)thd->gemini.context, + (dsmObject_t)tableNumber, + DSMOBJECT_TABLE,0, + DSM_LK_SHARE,0); + if (error) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Unable to unlock table %d", tableNumber); + } + } return checkStatus; } +int ha_gemini::saveKeyStats(THD *thd) +{ + dsmStatus_t rc = 0; + + /* Insert a row in the indexStats table for each column of + each index of the table */ + + for(uint i = 0; i < table->keys; i++) + { + for (uint j = 0; j < table->key_info[i].key_parts && !rc ;j++) + { + rc = dsmIndexStatsPut((dsmContext_t *)thd->gemini.context, + tableNumber, pindexNumbers[i], + j, (LONG64)table->key_info[i].rec_per_key[j]); + if (rc) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Failed to update index stats for table %d, index %d", + tableNumber, pindexNumbers[i]); + } + } + } + return rc; +} + int ha_gemini::fullCheck(THD *thd,byte *buf) { int error; @@ -2319,7 +2695,12 @@ int ha_gemini::repair(THD* thd, HA_CHECK_OPT* check_opt) &thd->gemini.savepoint, DSMTXN_SAVE, 0, 0); if (error) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Error setting savepoint number %d, error %d", + thd->gemini.savepoint++, error); return(error); + } thd->gemini.needSavepoint = 0; } @@ -2330,7 +2711,11 @@ int ha_gemini::repair(THD* thd, HA_CHECK_OPT* check_opt) DSMOBJECT_TABLE,0, DSM_LK_EXCL, 1, 0); if(error) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Failed to lock table %d, error %d",tableNumber, error); return error; + } error = dsmContextSetLong((dsmContext_t *)thd->gemini.context, DSM_TAGCONTEXT_NO_LOGGING,1); @@ -2338,13 +2723,18 @@ int ha_gemini::repair(THD* thd, HA_CHECK_OPT* check_opt) error = dsmTableReset((dsmContext_t *)thd->gemini.context, (dsmTable_t)tableNumber, table->keys, pindexNumbers); + if (error) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "dsmTableReset failed for table %d, error %d",tableNumber, error); + } - buf = my_malloc(table->rec_buff_length,MYF(MY_WME)); + buf = (byte*)my_malloc(table->rec_buff_length,MYF(MY_WME)); dsmRecord.table = tableNumber; dsmRecord.recid = 0; dsmRecord.pbuffer = (dsmBuffer_t *)rec_buff; dsmRecord.recLength = table->reclength; - dsmRecord.maxLength = table->reclength; + dsmRecord.maxLength = table->rec_buff_length; while(!error) { error = dsmTableScan((dsmContext_t *)thd->gemini.context, @@ -2352,13 +2742,15 @@ int ha_gemini::repair(THD* thd, HA_CHECK_OPT* check_opt) 1); if(!error) { - unpack_row((char *)buf,(char *)dsmRecord.pbuffer); - error = handleIndexEntries(buf,dsmRecord.recid,KEY_CREATE); - if(error == HA_ERR_FOUND_DUPP_KEY) + if (!(error = unpack_row((char *)buf,(char *)dsmRecord.pbuffer))) { - /* We don't want to stop on duplicate keys -- we're repairing - here so let's get as much repaired as possible. */ - error = 0; + error = handleIndexEntries(buf,dsmRecord.recid,KEY_CREATE); + if(error == HA_ERR_FOUND_DUPP_KEY) + { + /* We don't want to stop on duplicate keys -- we're repairing + here so let's get as much repaired as possible. */ + error = 0; + } } } } @@ -2366,7 +2758,13 @@ int ha_gemini::repair(THD* thd, HA_CHECK_OPT* check_opt) (dsmObject_t)tableNumber, DSMOBJECT_TABLE,0, DSM_LK_EXCL,0); - my_free(buf,MYF(MY_ALLOW_ZERO_PTR)); + if (error) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Unable to unlock table %d", tableNumber); + } + + my_free((char*)buf,MYF(MY_ALLOW_ZERO_PTR)); error = dsmContextSetLong((dsmContext_t *)thd->gemini.context, DSM_TAGCONTEXT_NO_LOGGING,0); @@ -2374,6 +2772,313 @@ int ha_gemini::repair(THD* thd, HA_CHECK_OPT* check_opt) return error; } + +int ha_gemini::restore(THD* thd, HA_CHECK_OPT *check_opt) +{ + dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; + char* backup_dir = thd->lex.backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char* table_name = table->real_name; + int error = 0; + int errornum; + const char* errmsg = ""; + dsmArea_t tableArea = 0; + dsmObjectAttr_t objectAttr; + dsmObject_t associate; + dsmObjectType_t associateType; + dsmDbkey_t block, root; + dsmStatus_t rc; + + rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXTABLE, tableNumber, + &tableArea, &objectAttr, &associateType, &block, &root); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmObjectInfo (.gmd) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + rc = dsmAreaFlush(pcontext, tableArea, FLUSH_BUFFERS | FLUSH_SYNC); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmAreaFlush (.gmd) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + rc = dsmAreaClose(pcontext, tableArea); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmAreaClose (.gmd) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + /* Restore the data file */ + if (!fn_format(src_path, table_name, backup_dir, ha_gemini_ext, 4 + 64)) + { + return HA_ADMIN_INVALID; + } + + if (my_copy(src_path, fn_format(dst_path, table->path, "", + ha_gemini_ext, 4), MYF(MY_WME))) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in my_copy (.gmd) (Error %d)"; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + rc = dsmAreaFlush(pcontext, tableArea, FREE_BUFFERS); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmAreaFlush (.gmd) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + rc = dsmAreaOpen(pcontext, tableArea, 1); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmAreaOpen (.gmd) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + +#ifdef GEMINI_BACKUP_IDX + dsmArea_t indexArea = 0; + + rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXINDEX, &indexArea, + &objectAttr, &associate, &associateType, &block, &root); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmObjectInfo (.gmi) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + rc = dsmAreaClose(pcontext, indexArea); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmAreaClose (.gmi) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + /* Restore the index file */ + if (!fn_format(src_path, table_name, backup_dir, ha_gemini_idx_ext, 4 + 64)) + { + return HA_ADMIN_INVALID; + } + + if (my_copy(src_path, fn_format(dst_path, table->path, "", + ha_gemini_idx_ext, 4), MYF(MY_WME))) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in my_copy (.gmi) (Error %d)"; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + rc = dsmAreaOpen(pcontext, indexArea, 1); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmAreaOpen (.gmi) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + return HA_ADMIN_OK; +#else /* #ifdef GEMINI_BACKUP_IDX */ + HA_CHECK_OPT tmp_check_opt; + tmp_check_opt.init(); + /* The following aren't currently implemented in ha_gemini::repair + ** tmp_check_opt.quick = 1; + ** tmp_check_opt.flags |= T_VERY_SILENT; + */ + return (repair(thd, &tmp_check_opt)); +#endif /* #ifdef GEMINI_BACKUP_IDX */ + + err: + { +#if 0 + /* mi_check_print_error is in ha_myisam.cc, so none of the informative + ** error messages above is currently being printed + */ + MI_CHECK param; + myisamchk_init(¶m); + param.thd = thd; + param.op_name = (char*)"restore"; + param.table_name = table->table_name; + param.testflag = 0; + mi_check_print_error(¶m,errmsg, errornum); +#endif + return error; + } +} + + +int ha_gemini::backup(THD* thd, HA_CHECK_OPT *check_opt) +{ + dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; + char* backup_dir = thd->lex.backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char* table_name = table->real_name; + int error = 0; + int errornum; + const char* errmsg = ""; + dsmArea_t tableArea = 0; + dsmObjectAttr_t objectAttr; + dsmObject_t associate; + dsmObjectType_t associateType; + dsmDbkey_t block, root; + dsmStatus_t rc; + + rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXTABLE, tableNumber, + &tableArea, &objectAttr, &associateType, &block, &root); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmObjectInfo (.gmd) (Error %d)"; + errornum = rc; + goto err; + } + + /* Flush the buffers before backing up the table */ + dsmAreaFlush((dsmContext_t *)thd->gemini.context, tableArea, + FLUSH_BUFFERS | FLUSH_SYNC); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmAreaFlush (.gmd) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + /* Backup the .FRM file */ + if (!fn_format(dst_path, table_name, backup_dir, reg_ext, 4 + 64)) + { + errmsg = "Failed in fn_format() for .frm file: errno = %d"; + error = HA_ADMIN_INVALID; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + if (my_copy(fn_format(src_path, table->path,"", reg_ext, 4), + dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES ))) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed copying .frm file: errno = %d"; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + /* Backup the data file */ + if (!fn_format(dst_path, table_name, backup_dir, ha_gemini_ext, 4 + 64)) + { + errmsg = "Failed in fn_format() for .GMD file: errno = %d"; + error = HA_ADMIN_INVALID; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + if (my_copy(fn_format(src_path, table->path,"", ha_gemini_ext, 4), + dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES )) ) + { + errmsg = "Failed copying .GMD file: errno = %d"; + error= HA_ADMIN_FAILED; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + +#ifdef GEMINI_BACKUP_IDX + dsmArea_t indexArea = 0; + + rc = dsmObjectInfo(pcontext, tableNumber, DSMOBJECT_MIXINDEX, &indexArea, + &objectAttr, &associate, &associateType, &block, &root); + if (rc) + { + error = HA_ADMIN_FAILED; + errmsg = "Failed in dsmObjectInfo (.gmi) (Error %d)"; + errornum = rc; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + /* Backup the index file */ + if (!fn_format(dst_path, table_name, backup_dir, ha_gemini_idx_ext, 4 + 64)) + { + errmsg = "Failed in fn_format() for .GMI file: errno = %d"; + error = HA_ADMIN_INVALID; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } + + if (my_copy(fn_format(src_path, table->path,"", ha_gemini_idx_ext, 4), + dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES )) ) + { + errmsg = "Failed copying .GMI file: errno = %d"; + error= HA_ADMIN_FAILED; + errornum = errno; + gemini_msg(pcontext, errmsg ,errornum); + goto err; + } +#endif /* #ifdef GEMINI_BACKUP_IDX */ + + return HA_ADMIN_OK; + + err: + { +#if 0 + /* mi_check_print_error is in ha_myisam.cc, so none of the informative + ** error messages above is currently being printed + */ + MI_CHECK param; + myisamchk_init(¶m); + param.thd = thd; + param.op_name = (char*)"backup"; + param.table_name = table->table_name; + param.testflag = 0; + mi_check_print_error(¶m,errmsg, errornum); +#endif + return error; + } +} + + +int ha_gemini::optimize(THD* thd, HA_CHECK_OPT *check_opt) +{ + return HA_ADMIN_ALREADY_DONE; +} + + ha_rows ha_gemini::records_in_range(int keynr, const byte *start_key,uint start_key_len, enum ha_rkey_function start_search_flag, @@ -2412,7 +3117,7 @@ ha_rows ha_gemini::records_in_range(int keynr, pbracketBase->keyLen = componentLen; } - pbracketBase->keyLen -= 3; + pbracketBase->keyLen -= FULLKEYHDRSZ; if(end_key) { @@ -2431,9 +3136,10 @@ ha_rows ha_gemini::records_in_range(int keynr, pbracketLimit->keyLen = componentLen; } - pbracketLimit->keyLen -= 3; + pbracketLimit->keyLen -= FULLKEYHDRSZ; error = dsmIndexRowsInRange((dsmContext_t *)current_thd->gemini.context, pbracketBase,pbracketLimit, + tableNumber, &pctInrange); if(pctInrange >= 1) rows = (ha_rows)pctInrange; @@ -2457,32 +3163,82 @@ ha_rows ha_gemini::records_in_range(int keynr, may only happen in rows with blobs, as the default row length is pre-allocated. */ -int ha_gemini::pack_row(byte **pprow, int *ppackedLength, const byte *record) +int ha_gemini::pack_row(byte **pprow, int *ppackedLength, const byte *record, + bool update) { + THD *thd = current_thd; + dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; + gemBlobDesc_t *pBlobDesc = pBlobDescs; + if (fixed_length_row) { *pprow = (byte *)record; *ppackedLength=(int)table->reclength; return 0; } - if (table->blob_fields) - { - return HA_ERR_WRONG_COMMAND; - } /* Copy null bits */ memcpy(rec_buff, record, table->null_bytes); byte *ptr=rec_buff + table->null_bytes; for (Field **field=table->field ; *field ; field++) - ptr=(byte*) (*field)->pack((char*) ptr,record + (*field)->offset()); + { +#ifdef GEMINI_TINYBLOB_IN_ROW + /* Tiny blobs (255 bytes or less) are stored in the row; larger + ** blobs are stored in a separate storage object (see ha_gemini::create). + */ + if ((*field)->type() == FIELD_TYPE_BLOB && + ((Field_blob*)*field)->blobtype() != FIELD_TYPE_TINY_BLOB) +#else + if ((*field)->type() == FIELD_TYPE_BLOB) +#endif + { + dsmBlob_t gemBlob; + char *blobptr; + + gemBlob.areaType = DSMOBJECT_BLOB; + gemBlob.blobObjNo = tableNumber; + gemBlob.blobId = 0; + gemBlob.totLength = gemBlob.segLength = + ((Field_blob*)*field)->get_length((char*)record + (*field)->offset()); + ((Field_blob*)*field)->get_ptr((char**) &blobptr); + gemBlob.pBuffer = (dsmBuffer_t *)blobptr; + gemBlob.blobContext.blobOffset = 0; + if (gemBlob.totLength) + { + dsmBlobStart(pcontext, &gemBlob); + if (update && pBlobDesc->blobId) + { + gemBlob.blobId = pBlobDesc->blobId; + dsmBlobUpdate(pcontext, &gemBlob, NULL); + } + else + { + dsmBlobPut(pcontext, &gemBlob, NULL); + } + dsmBlobEnd(pcontext, &gemBlob); + } + ptr = (byte*)((Field_blob*)*field)->pack_id((char*) ptr, + (char*)record + (*field)->offset(), (longlong)gemBlob.blobId); + + pBlobDesc++; + } + else + { + ptr=(byte*) (*field)->pack((char*) ptr, (char*)record + (*field)->offset()); + } + } *pprow=rec_buff; *ppackedLength= (ptr - rec_buff); return 0; } -void ha_gemini::unpack_row(char *record, char *prow) +int ha_gemini::unpack_row(char *record, char *prow) { + THD *thd = current_thd; + dsmContext_t *pcontext = (dsmContext_t *)thd->gemini.context; + gemBlobDesc_t *pBlobDesc = pBlobDescs; + if (fixed_length_row) { /* If the table is a VST, the row is in Gemini internal format. @@ -2568,38 +3324,129 @@ void ha_gemini::unpack_row(char *record, char *prow) const char *ptr= (const char*) prow; memcpy(record, ptr, table->null_bytes); ptr+=table->null_bytes; + for (Field **field=table->field ; *field ; field++) - ptr= (*field)->unpack(record + (*field)->offset(), ptr); + { +#ifdef GEMINI_TINYBLOB_IN_ROW + /* Tiny blobs (255 bytes or less) are stored in the row; larger + ** blobs are stored in a separate storage object (see ha_gemini::create). + */ + if ((*field)->type() == FIELD_TYPE_BLOB && + ((Field_blob*)*field)->blobtype() != FIELD_TYPE_TINY_BLOB) +#else + if ((*field)->type() == FIELD_TYPE_BLOB) +#endif + { + dsmBlob_t gemBlob; + + gemBlob.areaType = DSMOBJECT_BLOB; + gemBlob.blobObjNo = tableNumber; + gemBlob.blobId = (dsmBlobId_t)(((Field_blob*)*field)->get_id(ptr)); + if (gemBlob.blobId) + { + gemBlob.totLength = + gemBlob.segLength = ((Field_blob*)*field)->get_length(ptr); + /* Allocate memory to store the blob. This memory is freed + ** the next time unpack_row is called for this table. + */ + gemBlob.pBuffer = (dsmBuffer_t *)my_malloc(gemBlob.totLength, + MYF(0)); + if (!gemBlob.pBuffer) + { + return HA_ERR_OUT_OF_MEM; + } + gemBlob.blobContext.blobOffset = 0; + dsmBlobStart(pcontext, &gemBlob); + dsmBlobGet(pcontext, &gemBlob, NULL); + dsmBlobEnd(pcontext, &gemBlob); + } + else + { + gemBlob.pBuffer = 0; + } + ptr = ((Field_blob*)*field)->unpack_id(record + (*field)->offset(), + ptr, (char *)gemBlob.pBuffer); + pBlobDesc->blobId = gemBlob.blobId; + my_free((char*)pBlobDesc->pBlob, MYF(MY_ALLOW_ZERO_PTR)); + pBlobDesc->pBlob = gemBlob.pBuffer; + pBlobDesc++; + } + else + { + ptr= (*field)->unpack(record + (*field)->offset(), ptr); + } + } } + + return 0; } int ha_gemini::key_cmp(uint keynr, const byte * old_row, - const byte * new_row) + const byte * new_row, bool updateStats) { KEY_PART_INFO *key_part=table->key_info[keynr].key_part; KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts; - for ( ; key_part != end ; key_part++) + for ( uint i = 0 ; key_part != end ; key_part++, i++) { if (key_part->null_bit) { if ((old_row[key_part->null_offset] & key_part->null_bit) != (new_row[key_part->null_offset] & key_part->null_bit)) + { + if(updateStats) + table->key_info[keynr].rec_per_key[i]++; return 1; + } + else if((old_row[key_part->null_offset] & key_part->null_bit) && + (new_row[key_part->null_offset] & key_part->null_bit)) + /* Both are null */ + continue; } if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH)) { - - if (key_part->field->cmp_binary(old_row + key_part->offset, - new_row + key_part->offset, + if (key_part->field->cmp_binary((char*)(old_row + key_part->offset), + (char*)(new_row + key_part->offset), (ulong) key_part->length)) + { + if(updateStats) + table->key_info[keynr].rec_per_key[i]++; return 1; + } } else { if (memcmp(old_row+key_part->offset, new_row+key_part->offset, key_part->length)) + { + /* Check for special case of -0 which causes table check + to find an invalid key when comparing the the index + value of 0 to the -0 stored in the row */ + if(key_part->field->type() == FIELD_TYPE_DECIMAL) + { + double fieldValue; + char *ptr = key_part->field->ptr; + + key_part->field->ptr = (char *)old_row + key_part->offset; + fieldValue = key_part->field->val_real(); + if(fieldValue == 0) + { + key_part->field->ptr = (char *)new_row + key_part->offset; + fieldValue = key_part->field->val_real(); + if(fieldValue == 0) + { + key_part->field->ptr = ptr; + continue; + } + } + key_part->field->ptr = ptr; + } + if(updateStats) + { + table->key_info[keynr].rec_per_key[i]++; + } return 1; + } } } return 0; @@ -2612,13 +3459,13 @@ int gemini_parse_table_name(const char *fullname, char *dbname, char *tabname) /* separate out the name of the table and the database */ - namestart = strchr(fullname + start_of_name, '/'); + namestart = (char *)strchr(fullname + start_of_name, '/'); if (!namestart) { /* if on Windows, slashes go the other way */ - namestart = strchr(fullname + start_of_name, '\\'); + namestart = (char *)strchr(fullname + start_of_name, '\\'); } - nameend = strchr(fullname + start_of_name, '.'); + nameend = (char *)strchr(fullname + start_of_name, '.'); /* sometimes fullname has an extension, sometimes it doesn't */ if (!nameend) { @@ -2680,4 +3527,105 @@ static void print_msg(THD *thd, const char *table_name, const char *op_name, thd->killed=1; } +/* Load shared area with rows per key statistics */ +void +ha_gemini::get_index_stats(THD *thd) +{ + dsmStatus_t rc = 0; + ha_rows *rec_per_key = share->rec_per_key; + + for(uint i = 0; i < table->keys && !rc; i++) + { + for (uint j = 0; j < table->key_info[i].key_parts && !rc;j++) + { + LONG64 rows_per_key; + rc = dsmIndexStatsGet((dsmContext_t *)thd->gemini.context, + tableNumber, pindexNumbers[i],(int)j, + &rows_per_key); + if (rc) + { + gemini_msg((dsmContext_t *)thd->gemini.context, + "Index Statistics faild for table %d index %d, error %d", + tableNumber, pindexNumbers[i], rc); + } + *rec_per_key = (ha_rows)rows_per_key; + rec_per_key++; + } + } + return; +} + +/**************************************************************************** + Handling the shared GEM_SHARE structure that is needed to provide + a global in memory storage location of the rec_per_key stats used + by the optimizer. +****************************************************************************/ + +static byte* gem_get_key(GEM_SHARE *share,uint *length, + my_bool not_used __attribute__((unused))) +{ + *length=share->table_name_length; + return (byte*) share->table_name; +} + +static GEM_SHARE *get_share(const char *table_name, TABLE *table) +{ + GEM_SHARE *share; + + pthread_mutex_lock(&gem_mutex); + uint length=(uint) strlen(table_name); + if (!(share=(GEM_SHARE*) hash_search(&gem_open_tables, (byte*) table_name, + length))) + { + ha_rows *rec_per_key; + char *tmp_name; + + if ((share=(GEM_SHARE *) + my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), + &share, sizeof(*share), + &rec_per_key, table->key_parts * sizeof(ha_rows), + &tmp_name, length+1, + NullS))) + { + share->rec_per_key = rec_per_key; + share->table_name = tmp_name; + share->table_name_length=length; + strcpy(share->table_name,table_name); + if (hash_insert(&gem_open_tables, (byte*) share)) + { + pthread_mutex_unlock(&gem_mutex); + my_free((gptr) share,0); + return 0; + } + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex,NULL); + } + } + pthread_mutex_unlock(&gem_mutex); + return share; +} + +static int free_share(GEM_SHARE *share, bool mutex_is_locked) +{ + pthread_mutex_lock(&gem_mutex); + if (mutex_is_locked) + pthread_mutex_unlock(&share->mutex); + if (!--share->use_count) + { + hash_delete(&gem_open_tables, (byte*) share); + thr_lock_delete(&share->lock); + pthread_mutex_destroy(&share->mutex); + my_free((gptr) share, MYF(0)); + } + pthread_mutex_unlock(&gem_mutex); + return 0; +} + +static void gemini_lock_table_overflow_error(dsmContext_t *pcontext) +{ + gemini_msg(pcontext, "The total number of locks exceeds the lock table size"); + gemini_msg(pcontext, "Either increase gemini_lock_table_size or use a"); + gemini_msg(pcontext, "different transaction isolation level"); +} + #endif /* HAVE_GEMINI_DB */ diff --git a/sql/ha_gemini.h b/sql/ha_gemini.h index 7e6e8f26588..495dc2fd1c9 100644 --- a/sql/ha_gemini.h +++ b/sql/ha_gemini.h @@ -19,17 +19,26 @@ #pragma interface /* gcc class implementation */ #endif +#include "gem_global.h" #include "dstd.h" #include "dsmpub.h" /* class for the the gemini handler */ enum enum_key_string_options{KEY_CREATE,KEY_DELETE,KEY_CHECK}; - -#define READ_UNCOMMITED 0 -#define READ_COMMITED 1 -#define REPEATABLE_READ 2 -#define SERIALIZEABLE 3 +typedef struct st_gemini_share { + ha_rows *rec_per_key; + THR_LOCK lock; + pthread_mutex_t mutex; + char *table_name; + uint table_name_length,use_count; +} GEM_SHARE; + +typedef struct gemBlobDesc +{ + dsmBlobId_t blobId; + dsmBuffer_t *pBlob; +} gemBlobDesc_t; class ha_gemini: public handler { @@ -38,7 +47,7 @@ class ha_gemini: public handler uint int_option_flag; int tableNumber; dsmIndex_t *pindexNumbers; // dsm object numbers for the indexes on this table - unsigned long lastRowid; + dsmRecid_t lastRowid; uint last_dup_key; bool fixed_length_row, key_read, using_ignore; byte *rec_buff; @@ -46,10 +55,12 @@ class ha_gemini: public handler dsmKey_t *pbracketLimit; dsmKey_t *pfoundKey; dsmMask_t tableStatus; // Crashed/repair status + gemBlobDesc_t *pBlobDescs; int index_open(char *tableName); - int pack_row(byte **prow, int *ppackedLength, const byte *record); - void unpack_row(char *record, char *prow); + int pack_row(byte **prow, int *ppackedLength, const byte *record, + bool update); + int unpack_row(char *record, char *prow); int findRow(THD *thd, dsmMask_t findMode, byte *buf); int fetch_row(void *gemini_context, const byte *buf); int handleIndexEntries(const byte * record, dsmRecid_t recid, @@ -70,24 +81,28 @@ class ha_gemini: public handler void unpack_key(char *record, dsmKey_t *key, uint index); int key_cmp(uint keynr, const byte * old_row, - const byte * new_row); + const byte * new_row, bool updateStats); + int saveKeyStats(THD *thd); + void get_index_stats(THD *thd); short cursorId; /* cursorId of active index cursor if any */ dsmMask_t lockMode; /* Shared or exclusive */ /* FIXFIX Don't know why we need this because I don't know what store_lock method does but we core dump without this */ - THR_LOCK alock; THR_LOCK_DATA lock; + GEM_SHARE *share; + public: ha_gemini(TABLE *table): handler(table), file(0), int_option_flag(HA_READ_NEXT | HA_READ_PREV | HA_REC_NOT_IN_SEQ | HA_KEYPOS_TO_RNDPOS | HA_READ_ORDER | HA_LASTKEY_ORDER | HA_LONGLONG_KEYS | HA_NULL_KEY | HA_HAVE_KEY_READ_ONLY | - HA_NO_BLOBS | HA_NO_TEMP_TABLES | - /* HA_BLOB_KEY | */ /*HA_NOT_EXACT_COUNT | */ + HA_BLOB_KEY | + HA_NO_TEMP_TABLES | HA_NO_FULLTEXT_KEY | + /*HA_NOT_EXACT_COUNT | */ /*HA_KEY_READ_WRONG_STR |*/ HA_DROP_BEFORE_CREATE), pbracketBase(0),pbracketLimit(0),pfoundKey(0), cursorId(0) @@ -100,7 +115,7 @@ class ha_gemini: public handler uint max_record_length() const { return MAXRECSZ; } uint max_keys() const { return MAX_KEY-1; } uint max_key_parts() const { return MAX_REF_PARTS; } - uint max_key_length() const { return MAXKEYSZ; } + uint max_key_length() const { return MAXKEYSZ / 2; } bool fast_key_read() { return 1;} bool has_transactions() { return 1;} @@ -129,8 +144,12 @@ class ha_gemini: public handler void info(uint); int extra(enum ha_extra_function operation); int reset(void); + int analyze(THD* thd, HA_CHECK_OPT* check_opt); int check(THD* thd, HA_CHECK_OPT* check_opt); int repair(THD* thd, HA_CHECK_OPT* check_opt); + int restore(THD* thd, HA_CHECK_OPT* check_opt); + int backup(THD* thd, HA_CHECK_OPT* check_opt); + int optimize(THD* thd, HA_CHECK_OPT* check_opt); int external_lock(THD *thd, int lock_type); virtual longlong get_auto_increment(); void position(byte *record); @@ -139,7 +158,7 @@ class ha_gemini: public handler enum ha_rkey_function start_search_flag, const byte *end_key,uint end_key_len, enum ha_rkey_function end_search_flag); - + void update_create_info(HA_CREATE_INFO *create_info); int create(const char *name, register TABLE *form, HA_CREATE_INFO *create_info); int delete_table(const char *name); @@ -167,6 +186,7 @@ extern long gemini_locktablesize; extern long gemini_lock_wait_timeout; extern long gemini_spin_retries; extern long gemini_connection_limit; +extern char *gemini_basedir; extern TYPELIB gemini_recovery_typelib; extern ulong gemini_recovery_options; @@ -175,12 +195,13 @@ bool gemini_end(void); bool gemini_flush_logs(void); int gemini_commit(THD *thd); int gemini_rollback(THD *thd); +int gemini_recovery_logging(THD *thd, bool on); void gemini_disconnect(THD *thd); int gemini_rollback_to_savepoint(THD *thd); int gemini_parse_table_name(const char *fullname, char *dbname, char *tabname); int gemini_is_vst(const char *pname); int gemini_set_option_long(int optid, long optval); -const int gemini_blocksize = 8192; -const int gemini_recbits = 7; +const int gemini_blocksize = BLKSIZE; +const int gemini_recbits = DEFAULT_RECBITS; diff --git a/sql/handler.cc b/sql/handler.cc index 212fcea11ae..7720e9ca671 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -694,6 +694,15 @@ void handler::print_error(int error, myf errflag) case HA_ERR_RECORD_FILE_FULL: textno=ER_RECORD_FILE_FULL; break; + case HA_ERR_LOCK_WAIT_TIMEOUT: + textno=ER_LOCK_WAIT_TIMEOUT; + break; + case HA_ERR_LOCK_TABLE_FULL: + textno=ER_LOCK_TABLE_FULL; + break; + case HA_ERR_READ_ONLY_TRANSACTION: + textno=ER_READ_ONLY_TRANSACTION; + break; default: { my_error(ER_GET_ERRNO,errflag,error); @@ -757,6 +766,25 @@ int ha_commit_rename(THD *thd) return error; } +/* Tell the handler to turn on or off logging to the handler's + recovery log +*/ +int ha_recovery_logging(THD *thd, bool on) +{ + int error=0; + + DBUG_ENTER("ha_recovery_logging"); +#ifdef USING_TRANSACTIONS + if (opt_using_transactions) + { +#ifdef HAVE_GEMINI_DB + error = gemini_recovery_logging(thd, on); + } +#endif +#endif + DBUG_RETURN(error); +} + int handler::index_next_same(byte *buf, const byte *key, uint keylen) { int error; diff --git a/sql/handler.h b/sql/handler.h index 076bf783f80..7a28dc07a81 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -74,6 +74,7 @@ #define HA_NOT_DELETE_WITH_CACHE (HA_NOT_READ_AFTER_KEY*2) #define HA_NO_TEMP_TABLES (HA_NOT_DELETE_WITH_CACHE*2) #define HA_NO_PREFIX_CHAR_KEYS (HA_NO_TEMP_TABLES*2) +#define HA_NO_FULLTEXT_KEY (HA_NO_PREFIX_CHAR_KEYS*2) /* Parameters for open() (in register form->filestat) */ /* HA_GET_INFO does a implicit HA_ABORT_IF_LOCKED */ @@ -353,3 +354,4 @@ int ha_autocommit_or_rollback(THD *thd, int error); void ha_set_spin_retries(uint retries); bool ha_flush_logs(void); int ha_commit_rename(THD *thd); +int ha_recovery_logging(THD *thd, bool on); diff --git a/sql/lock.cc b/sql/lock.cc index 23f81c9c164..1d9aca66e74 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -35,6 +35,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table,uint count, bool unlock, TABLE **write_locked); static int lock_external(TABLE **table,uint count); static int unlock_external(THD *thd, TABLE **table,uint count); +static void print_lock_error(int error); MYSQL_LOCK *mysql_lock_tables(THD *thd,TABLE **tables,uint count) @@ -154,7 +155,7 @@ static int lock_external(TABLE **tables,uint count) (*tables)->file->external_lock(thd, F_UNLCK); (*tables)->current_lock=F_UNLCK; } - my_error(ER_CANT_LOCK,MYF(ME_BELL+ME_OLDWIN+ME_WAITTANG),error); + print_lock_error(error); DBUG_RETURN(error); } else @@ -325,7 +326,7 @@ static int unlock_external(THD *thd, TABLE **table,uint count) } } if (error_code) - my_error(ER_CANT_LOCK,MYF(ME_BELL+ME_OLDWIN+ME_WAITTANG),error_code); + print_lock_error(error_code); DBUG_RETURN(error_code); } @@ -480,3 +481,24 @@ bool wait_for_locked_table_names(THD *thd, TABLE_LIST *table_list) } DBUG_RETURN(result); } + +static void print_lock_error(int error) +{ + int textno; + DBUG_ENTER("print_lock_error"); + + switch (error) { + case HA_ERR_LOCK_WAIT_TIMEOUT: + textno=ER_LOCK_WAIT_TIMEOUT; + break; + case HA_ERR_READ_ONLY_TRANSACTION: + textno=ER_READ_ONLY_TRANSACTION; + break; + default: + textno=ER_CANT_LOCK; + break; + } + my_error(textno,MYF(ME_BELL+ME_OLDWIN+ME_WAITTANG),error); + DBUG_VOID_RETURN; +} + diff --git a/sql/share/czech/errmsg.txt b/sql/share/czech/errmsg.txt index 666d70c957a..35a428273c7 100644 --- a/sql/share/czech/errmsg.txt +++ b/sql/share/czech/errmsg.txt @@ -215,3 +215,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/danish/errmsg.txt b/sql/share/danish/errmsg.txt index 9f1f6accc1f..b2fe6c4e800 100644 --- a/sql/share/danish/errmsg.txt +++ b/sql/share/danish/errmsg.txt @@ -209,3 +209,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/dutch/errmsg.txt b/sql/share/dutch/errmsg.txt index 8b44af7eb7b..616f832bee8 100644 --- a/sql/share/dutch/errmsg.txt +++ b/sql/share/dutch/errmsg.txt @@ -206,3 +206,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/english/errmsg.txt b/sql/share/english/errmsg.txt index ff29fffe958..018d558d7de 100644 --- a/sql/share/english/errmsg.txt +++ b/sql/share/english/errmsg.txt @@ -206,3 +206,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/estonian/errmsg.txt b/sql/share/estonian/errmsg.txt index f1559f4a44d..e1e03e4a596 100644 --- a/sql/share/estonian/errmsg.txt +++ b/sql/share/estonian/errmsg.txt @@ -210,3 +210,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/french/errmsg.txt b/sql/share/french/errmsg.txt index 5cbcfe81b87..aadfecbc8a1 100644 --- a/sql/share/french/errmsg.txt +++ b/sql/share/french/errmsg.txt @@ -206,3 +206,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/german/errmsg.txt b/sql/share/german/errmsg.txt index 307ed7a00f4..7a86a4368e7 100644 --- a/sql/share/german/errmsg.txt +++ b/sql/share/german/errmsg.txt @@ -209,3 +209,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/greek/errmsg.txt b/sql/share/greek/errmsg.txt index 119de63b2a7..5022bb65792 100644 --- a/sql/share/greek/errmsg.txt +++ b/sql/share/greek/errmsg.txt @@ -206,3 +206,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/hungarian/errmsg.txt b/sql/share/hungarian/errmsg.txt index 7e9b9e6a3bf..cfdd4b7fe75 100644 --- a/sql/share/hungarian/errmsg.txt +++ b/sql/share/hungarian/errmsg.txt @@ -208,3 +208,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/italian/errmsg.txt b/sql/share/italian/errmsg.txt index d6c857d44a4..d1b17bc8f2e 100644 --- a/sql/share/italian/errmsg.txt +++ b/sql/share/italian/errmsg.txt @@ -206,3 +206,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/japanese/errmsg.txt b/sql/share/japanese/errmsg.txt index a62f22c253d..9dfe9bb3efb 100644 --- a/sql/share/japanese/errmsg.txt +++ b/sql/share/japanese/errmsg.txt @@ -208,3 +208,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/korean/errmsg.txt b/sql/share/korean/errmsg.txt index c476ad8fa3c..4f0f90f88ce 100644 --- a/sql/share/korean/errmsg.txt +++ b/sql/share/korean/errmsg.txt @@ -206,3 +206,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/norwegian-ny/errmsg.txt b/sql/share/norwegian-ny/errmsg.txt index 2a57c93cc84..99238d61e3e 100644 --- a/sql/share/norwegian-ny/errmsg.txt +++ b/sql/share/norwegian-ny/errmsg.txt @@ -208,3 +208,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/norwegian/errmsg.txt b/sql/share/norwegian/errmsg.txt index cf23991eefa..473d297b649 100644 --- a/sql/share/norwegian/errmsg.txt +++ b/sql/share/norwegian/errmsg.txt @@ -208,3 +208,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/polish/errmsg.txt b/sql/share/polish/errmsg.txt index 03e9d59dacd..253d4afd2b7 100644 --- a/sql/share/polish/errmsg.txt +++ b/sql/share/polish/errmsg.txt @@ -210,3 +210,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/portuguese/errmsg.txt b/sql/share/portuguese/errmsg.txt index 37f2bf9e7ac..ba010a20710 100644 --- a/sql/share/portuguese/errmsg.txt +++ b/sql/share/portuguese/errmsg.txt @@ -206,3 +206,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/romanian/errmsg.txt b/sql/share/romanian/errmsg.txt index 6bc2695bed5..384df0c864e 100644 --- a/sql/share/romanian/errmsg.txt +++ b/sql/share/romanian/errmsg.txt @@ -210,3 +210,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/russian/errmsg.txt b/sql/share/russian/errmsg.txt index 75d21dda888..7dd24c743bb 100644 --- a/sql/share/russian/errmsg.txt +++ b/sql/share/russian/errmsg.txt @@ -209,3 +209,6 @@ "îÅ ÍÏÇÕ ÓÏÚÄÁÔØ ÐÒÏÃÅÓÓ SLAVE, ÐÒÏ×ÅÒØÔÅ ÓÉÓÔÅÍÎÙÅ ÒÅÓÕÒÓÙ", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/slovak/errmsg.txt b/sql/share/slovak/errmsg.txt index 673499f5105..2a6063b6aee 100644 --- a/sql/share/slovak/errmsg.txt +++ b/sql/share/slovak/errmsg.txt @@ -214,3 +214,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/spanish/errmsg.txt b/sql/share/spanish/errmsg.txt index d470556fd58..dbf7caf585d 100644 --- a/sql/share/spanish/errmsg.txt +++ b/sql/share/spanish/errmsg.txt @@ -207,3 +207,6 @@ "Could not create slave thread, check system resources", "User %-.64s has already more than 'max_user_connections' active connections", "You may only use constant expressions with SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/share/swedish/errmsg.txt b/sql/share/swedish/errmsg.txt index 672ce97c575..fc26a08e9ee 100644 --- a/sql/share/swedish/errmsg.txt +++ b/sql/share/swedish/errmsg.txt @@ -206,3 +206,6 @@ "Kunde inte starta en tråd för replikering", "Användare '%-.64s' har redan 'max_user_connections' aktiva inloggningar", "Du kan endast använda konstant-uttryck med SET", +"Lock wait timeout exceeded", +"The total number of locks exceeds the lock table size", +"Update locks cannot be acquired during a READ UNCOMMITTED transaction", diff --git a/sql/sql_base.cc b/sql/sql_base.cc index e7d63e1e5e4..d9470ee0b59 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -1388,11 +1388,6 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type) bool refresh; DBUG_ENTER("open_ltable"); -#ifdef __WIN__ - /* Win32 can't drop a file that is open */ - if (lock_type == TL_WRITE_ALLOW_READ) - lock_type= TL_WRITE; -#endif thd->proc_info="Opening table"; while (!(table=open_table(thd,table_list->db ? table_list->db : thd->db, table_list->real_name,table_list->name, @@ -1400,6 +1395,19 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type) if (table) { int error; + +#ifdef __WIN__ + /* Win32 can't drop a file that is open */ + if (lock_type == TL_WRITE_ALLOW_READ +#ifdef HAVE_GEMINI_DB + && table->db_type != DB_TYPE_GEMINI +#endif /* HAVE_GEMINI_DB */ + ) + { + lock_type= TL_WRITE; + } +#endif /* __WIN__ */ + table_list->table=table; table->grant= table_list->grant; if (thd->locked_tables) diff --git a/sql/sql_table.cc b/sql/sql_table.cc index ad39b91a5ca..bd7c82d3e26 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -423,6 +423,13 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name, column->field_name); DBUG_RETURN(-1); } + if (key->type == Key::FULLTEXT && + (file->option_flag() & HA_NO_FULLTEXT_KEY)) + { + my_printf_error(ER_WRONG_KEY_COLUMN, ER(ER_WRONG_KEY_COLUMN), MYF(0), + column->field_name); + DBUG_RETURN(-1); + } if (f_is_blob(sql_field->pack_flag)) { if (!(file->option_flag() & HA_BLOB_KEY)) @@ -1678,6 +1685,16 @@ copy_data_between_tables(TABLE *from,TABLE *to, goto err; }; + /* Turn off recovery logging since rollback of an + alter table is to delete the new table so there + is no need to log the changes to it. */ + error = ha_recovery_logging(thd,false); + if(error) + { + error = 1; + goto err; + } + init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1,1); if (handle_duplicates == DUP_IGNORE || handle_duplicates == DUP_REPLACE) @@ -1723,6 +1740,7 @@ copy_data_between_tables(TABLE *from,TABLE *to, if (to->file->activate_all_index(thd)) error=1; + tmp_error = ha_recovery_logging(thd,true); /* Ensure that the new table is saved properly to disk so that we can do a rename @@ -1734,6 +1752,7 @@ copy_data_between_tables(TABLE *from,TABLE *to, if (to->file->external_lock(thd,F_UNLCK)) error=1; err: + tmp_error = ha_recovery_logging(thd,true); free_io_cache(from); *copied= found_count; *deleted=delete_count; |