diff options
author | unknown <acurtis@xiphis.org> | 2006-04-24 13:36:15 -0700 |
---|---|---|
committer | unknown <acurtis@xiphis.org> | 2006-04-24 13:36:15 -0700 |
commit | 1260bc887358d3fc4c060b21eb1c755c3f5dee69 (patch) | |
tree | 8d426c8740820381d4c07c02035f4a4bf29c2fa3 /storage/archive | |
parent | 3f63edb362e89df761e575842ac39ef4c86a664c (diff) | |
parent | 5521b1bb9e5c7fb086feb9c731873b5874ccce0a (diff) | |
download | mariadb-git-1260bc887358d3fc4c060b21eb1c755c3f5dee69.tar.gz |
Merge xiphis.org:/home/antony/work2/wl3201.3
into xiphis.org:/home/antony/work2/wl3201.merge
BUILD/SETUP.sh:
Auto merged
libmysqld/Makefile.am:
Auto merged
sql/ha_heap.cc:
Auto merged
sql/ha_innodb.cc:
Auto merged
sql/ha_myisammrg.cc:
Auto merged
sql/ha_ndbcluster.cc:
Auto merged
sql/ha_ndbcluster_binlog.cc:
Auto merged
sql/ha_partition.cc:
Auto merged
sql/handler.cc:
Auto merged
sql/handler.h:
Auto merged
sql/log.cc:
Auto merged
sql/mysqld.cc:
Auto merged
sql/partition_info.cc:
Auto merged
sql/sql_plugin.cc:
Auto merged
sql/sql_yacc.yy:
Auto merged
storage/archive/ha_archive.cc:
Auto merged
storage/archive/ha_archive.h:
Auto merged
storage/example/ha_example.cc:
Auto merged
Makefile.am:
Merge for WL#3201, some post-merge fixes will be required.
configure.in:
Merge for WL#3201
plugin/Makefile.am:
Merge for WL#3201
storage/innobase/Makefile.am:
Merge for WL#3201
Diffstat (limited to 'storage/archive')
-rw-r--r-- | storage/archive/Makefile.am | 50 | ||||
-rw-r--r-- | storage/archive/cmakelists.txt | 2 | ||||
-rw-r--r-- | storage/archive/ha_archive.cc | 1572 | ||||
-rw-r--r-- | storage/archive/ha_archive.h | 139 |
4 files changed, 1751 insertions, 12 deletions
diff --git a/storage/archive/Makefile.am b/storage/archive/Makefile.am index 415e0dc8f8f..0920fe1a897 100644 --- a/storage/archive/Makefile.am +++ b/storage/archive/Makefile.am @@ -14,20 +14,48 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ - @ZLIB_INCLUDES@ +#called from the top level Makefile -LDADD = libarchive.a \ - $(top_builddir)/mysys/libmysys.a \ - $(top_builddir)/dbug/libdbug.a \ - $(top_builddir)/strings/libmystrings.a \ - @ZLIB_LIBS@ -pkglib_LIBRARIES = libarchive.a +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) @ZLIB_INCLUDES@ +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ + +noinst_HEADERS = ha_archive.h azlib.h noinst_PROGRAMS = archive_test + +EXTRA_LTLIBRARIES = ha_archive.la +pkglib_LTLIBRARIES = @plugin_archive_shared_target@ +ha_archive_la_LDFLAGS = -module -rpath $(MYSQLLIBdir) +ha_archive_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_archive_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN +ha_archive_la_SOURCES = ha_archive.cc azio.c + + +EXTRA_LIBRARIES = libarchive.a +noinst_LIBRARIES = @plugin_archive_static_target@ +libarchive_a_CXXFLAGS = $(AM_CFLAGS) +libarchive_a_CFLAGS = $(AM_CFLAGS) +libarchive_a_SOURCES = ha_archive.cc azio.c + + +archive_test_SOURCES = archive_test.c azio.c +archive_test_CFLAGS = $(AM_CFLAGS) +archive_test_LDADD = $(top_srcdir)/mysys/libmysys.a \ + $(top_srcdir)/dbug/libdbug.a \ + $(top_srcdir)/strings/libmystrings.a \ + @ZLIB_LIBS@ archive_test_LDFLAGS = @NOINST_LDFLAGS@ -noinst_HEADERS = azlib.h -libarchive_a_SOURCES = azio.c -EXTRA_DIST = cmakelists.txt + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/archive/cmakelists.txt b/storage/archive/cmakelists.txt index 4189781e73a..939f5562d50 100644 --- a/storage/archive/cmakelists.txt +++ b/storage/archive/cmakelists.txt @@ -2,5 +2,5 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib) -ADD_LIBRARY(archive azio.c) +ADD_LIBRARY(archive azio.c ha_archive.cc ha_archive.h) TARGET_LINK_LIBRARIES(archive zlib mysys dbug strings) diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc new file mode 100644 index 00000000000..32feff288f6 --- /dev/null +++ b/storage/archive/ha_archive.cc @@ -0,0 +1,1572 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mysql_priv.h" +#include <myisam.h> + +#include "ha_archive.h" +#include <my_dir.h> + +#include <mysql/plugin.h> + +/* + First, if you want to understand storage engines you should look at + ha_example.cc and ha_example.h. + This example was written as a test case for a customer who needed + a storage engine without indexes that could compress data very well. + So, welcome to a completely compressed storage engine. This storage + engine only does inserts. No replace, deletes, or updates. All reads are + complete table scans. Compression is done through azip (bzip compresses + better, but only marginally, if someone asks I could add support for + it too, but beaware that it costs a lot more in CPU time then azip). + + We keep a file pointer open for each instance of ha_archive for each read + but for writes we keep one open file handle just for that. We flush it + only if we have a read occur. azip handles compressing lots of records + at once much better then doing lots of little records between writes. + It is possible to not lock on writes but this would then mean we couldn't + handle bulk inserts as well (that is if someone was trying to read at + the same time since we would want to flush). + + A "meta" file is kept alongside the data file. This file serves two purpose. + The first purpose is to track the number of rows in the table. The second + purpose is to determine if the table was closed properly or not. When the + meta file is first opened it is marked as dirty. It is opened when the table + itself is opened for writing. When the table is closed the new count for rows + is written to the meta file and the file is marked as clean. If the meta file + is opened and it is marked as dirty, it is assumed that a crash occured. At + this point an error occurs and the user is told to rebuild the file. + A rebuild scans the rows and rewrites the meta file. If corruption is found + in the data file then the meta file is not repaired. + + At some point a recovery method for such a drastic case needs to be divised. + + Locks are row level, and you will get a consistant read. + + For performance as far as table scans go it is quite fast. I don't have + good numbers but locally it has out performed both Innodb and MyISAM. For + Innodb the question will be if the table can be fit into the buffer + pool. For MyISAM its a question of how much the file system caches the + MyISAM file. With enough free memory MyISAM is faster. Its only when the OS + doesn't have enough memory to cache entire table that archive turns out + to be any faster. + + Examples between MyISAM (packed) and Archive. + + Table with 76695844 identical rows: + 29680807 a_archive.ARZ + 920350317 a.MYD + + + Table with 8991478 rows (all of Slashdot's comments): + 1922964506 comment_archive.ARZ + 2944970297 comment_text.MYD + + + TODO: + Add bzip optional support. + Allow users to set compression level. + Implement versioning, should be easy. + Allow for errors, find a way to mark bad rows. + Add optional feature so that rows can be flushed at interval (which will cause less + compression but may speed up ordered searches). + Checkpoint the meta file to allow for faster rebuilds. + Dirty open (right now the meta file is repaired if a crash occured). + Option to allow for dirty reads, this would lower the sync calls, which would make + inserts a lot faster, but would mean highly arbitrary reads. + + -Brian +*/ +/* + Notes on file formats. + The Meta file is layed out as: + check - Just an int of 254 to make sure that the the file we are opening was + never corrupted. + version - The current version of the file format. + rows - This is an unsigned long long which is the number of rows in the data + file. + check point - Reserved for future use + auto increment - MAX value for autoincrement + dirty - Status of the file, whether or not its values are the latest. This + flag is what causes a repair to occur + + The data file: + check - Just an int of 254 to make sure that the the file we are opening was + never corrupted. + version - The current version of the file format. + data - The data is stored in a "row +blobs" format. +*/ + +/* If the archive storage engine has been inited */ +static bool archive_inited= FALSE; +/* Variables for archive share methods */ +pthread_mutex_t archive_mutex; +static HASH archive_open_tables; + +/* The file extension */ +#define ARZ ".ARZ" // The data file +#define ARN ".ARN" // Files used during an optimize call +#define ARM ".ARM" // Meta file +/* + uchar + uchar + ulonglong + ulonglong + ulonglong + ulonglong + FN_REFLEN + + uchar +*/ +#define META_BUFFER_SIZE sizeof(uchar) + sizeof(uchar) + sizeof(ulonglong) \ + + sizeof(ulonglong) + sizeof(ulonglong) + sizeof(ulonglong) + FN_REFLEN \ + + sizeof(uchar) + +/* + uchar + uchar +*/ +#define DATA_BUFFER_SIZE 2 // Size of the data used in the data file +#define ARCHIVE_CHECK_HEADER 254 // The number we use to determine corruption + +/* Static declarations for handerton */ +static handler *archive_create_handler(TABLE_SHARE *table); +/* + Number of rows that will force a bulk insert. +*/ +#define ARCHIVE_MIN_ROWS_TO_USE_BULK_INSERT 2 + + +/* dummy handlerton - only to have something to return from archive_db_init */ +handlerton archive_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, + "ARCHIVE", + SHOW_OPTION_YES, + "Archive storage engine", + DB_TYPE_ARCHIVE_DB, + archive_db_init, + 0, /* slot */ + 0, /* savepoint size. */ + NULL, /* close_connection */ + NULL, /* savepoint */ + NULL, /* rollback to savepoint */ + NULL, /* releas savepoint */ + NULL, /* commit */ + NULL, /* rollback */ + NULL, /* prepare */ + NULL, /* recover */ + NULL, /* commit_by_xid */ + NULL, /* rollback_by_xid */ + NULL, /* create_cursor_read_view */ + NULL, /* set_cursor_read_view */ + NULL, /* close_cursor_read_view */ + archive_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + archive_db_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter interface */ + NULL, /* fill_files_table */ + HTON_NO_FLAGS, + NULL, /* binlog_func */ + NULL, /* binlog_log_query */ + NULL /* release_temporary_latches */ + +}; + +static handler *archive_create_handler(TABLE_SHARE *table) +{ + return new ha_archive(table); +} + +/* + Used for hash table that tracks open tables. +*/ +static byte* archive_get_key(ARCHIVE_SHARE *share,uint *length, + my_bool not_used __attribute__((unused))) +{ + *length=share->table_name_length; + return (byte*) share->table_name; +} + + +/* + Initialize the archive handler. + + SYNOPSIS + archive_db_init() + void + + RETURN + FALSE OK + TRUE Error +*/ + +bool archive_db_init() +{ + DBUG_ENTER("archive_db_init"); + if (archive_inited) + DBUG_RETURN(FALSE); + if (pthread_mutex_init(&archive_mutex, MY_MUTEX_INIT_FAST)) + goto error; + if (hash_init(&archive_open_tables, system_charset_info, 32, 0, 0, + (hash_get_key) archive_get_key, 0, 0)) + { + VOID(pthread_mutex_destroy(&archive_mutex)); + } + else + { + archive_inited= TRUE; + DBUG_RETURN(FALSE); + } +error: + DBUG_RETURN(TRUE); +} + +/* + Release the archive handler. + + SYNOPSIS + archive_db_done() + void + + RETURN + FALSE OK +*/ + +int archive_db_done() +{ + if (archive_inited) + { + hash_free(&archive_open_tables); + VOID(pthread_mutex_destroy(&archive_mutex)); + } + archive_inited= 0; + return 0; +} + + +int archive_db_end(ha_panic_function type) +{ + return archive_db_done(); +} + +ha_archive::ha_archive(TABLE_SHARE *table_arg) + :handler(&archive_hton, table_arg), delayed_insert(0), bulk_insert(0) +{ + /* Set our original buffer from pre-allocated memory */ + buffer.set((char *)byte_buffer, IO_SIZE, system_charset_info); + + /* The size of the offset value we will use for position() */ + ref_length = sizeof(my_off_t); +} + +/* + This method reads the header of a datafile and returns whether or not it was successful. +*/ +int ha_archive::read_data_header(azio_stream *file_to_read) +{ + uchar data_buffer[DATA_BUFFER_SIZE]; + DBUG_ENTER("ha_archive::read_data_header"); + + if (azrewind(file_to_read) == -1) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + if (azread(file_to_read, data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE) + DBUG_RETURN(errno ? errno : -1); + + DBUG_PRINT("ha_archive::read_data_header", ("Check %u", data_buffer[0])); + DBUG_PRINT("ha_archive::read_data_header", ("Version %u", data_buffer[1])); + + if ((data_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) && + (data_buffer[1] != (uchar)ARCHIVE_VERSION)) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + DBUG_RETURN(0); +} + +/* + This method writes out the header of a datafile and returns whether or not it was successful. +*/ +int ha_archive::write_data_header(azio_stream *file_to_write) +{ + uchar data_buffer[DATA_BUFFER_SIZE]; + DBUG_ENTER("ha_archive::write_data_header"); + + data_buffer[0]= (uchar)ARCHIVE_CHECK_HEADER; + data_buffer[1]= (uchar)ARCHIVE_VERSION; + + if (azwrite(file_to_write, &data_buffer, DATA_BUFFER_SIZE) != + DATA_BUFFER_SIZE) + goto error; + DBUG_PRINT("ha_archive::write_data_header", ("Check %u", (uint)data_buffer[0])); + DBUG_PRINT("ha_archive::write_data_header", ("Version %u", (uint)data_buffer[1])); + + DBUG_RETURN(0); +error: + DBUG_RETURN(errno); +} + +/* + This method reads the header of a meta file and returns whether or not it was successful. + *rows will contain the current number of rows in the data file upon success. +*/ +int ha_archive::read_meta_file(File meta_file, ha_rows *rows, + ulonglong *auto_increment, + ulonglong *forced_flushes, + char *real_path) +{ + uchar meta_buffer[META_BUFFER_SIZE]; + uchar *ptr= meta_buffer; + ulonglong check_point; + + DBUG_ENTER("ha_archive::read_meta_file"); + + VOID(my_seek(meta_file, 0, MY_SEEK_SET, MYF(0))); + if (my_read(meta_file, (byte*)meta_buffer, META_BUFFER_SIZE, 0) != META_BUFFER_SIZE) + DBUG_RETURN(-1); + + /* + Parse out the meta data, we ignore version at the moment + */ + + ptr+= sizeof(uchar)*2; // Move past header + *rows= (ha_rows)uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past rows + check_point= uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past check_point + *auto_increment= uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past auto_increment + *forced_flushes= uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past forced_flush + memmove(real_path, ptr, FN_REFLEN); + ptr+= FN_REFLEN; // Move past the possible location of the file + + DBUG_PRINT("ha_archive::read_meta_file", ("Check %d", (uint)meta_buffer[0])); + DBUG_PRINT("ha_archive::read_meta_file", ("Version %d", (uint)meta_buffer[1])); + DBUG_PRINT("ha_archive::read_meta_file", ("Rows %llu", *rows)); + DBUG_PRINT("ha_archive::read_meta_file", ("Checkpoint %llu", check_point)); + DBUG_PRINT("ha_archive::read_meta_file", ("Auto-Increment %llu", *auto_increment)); + DBUG_PRINT("ha_archive::read_meta_file", ("Forced Flushes %llu", *forced_flushes)); + DBUG_PRINT("ha_archive::read_meta_file", ("Real Path %s", real_path)); + DBUG_PRINT("ha_archive::read_meta_file", ("Dirty %d", (int)(*ptr))); + + if ((meta_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) || + ((bool)(*ptr)== TRUE)) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + my_sync(meta_file, MYF(MY_WME)); + + DBUG_RETURN(0); +} + +/* + This method writes out the header of a meta file and returns whether or not it was successful. + By setting dirty you say whether or not the file represents the actual state of the data file. + Upon ::open() we set to dirty, and upon ::close() we set to clean. +*/ +int ha_archive::write_meta_file(File meta_file, ha_rows rows, + ulonglong auto_increment, + ulonglong forced_flushes, + char *real_path, + bool dirty) +{ + uchar meta_buffer[META_BUFFER_SIZE]; + uchar *ptr= meta_buffer; + ulonglong check_point= 0; //Reserved for the future + + DBUG_ENTER("ha_archive::write_meta_file"); + + *ptr= (uchar)ARCHIVE_CHECK_HEADER; + ptr += sizeof(uchar); + *ptr= (uchar)ARCHIVE_VERSION; + ptr += sizeof(uchar); + int8store(ptr, (ulonglong)rows); + ptr += sizeof(ulonglong); + int8store(ptr, check_point); + ptr += sizeof(ulonglong); + int8store(ptr, auto_increment); + ptr += sizeof(ulonglong); + int8store(ptr, forced_flushes); + ptr += sizeof(ulonglong); + // No matter what, we pad with nulls + if (real_path) + strncpy((char *)ptr, real_path, FN_REFLEN); + else + bzero(ptr, FN_REFLEN); + ptr += FN_REFLEN; + *ptr= (uchar)dirty; + DBUG_PRINT("ha_archive::write_meta_file", ("Check %d", + (uint)ARCHIVE_CHECK_HEADER)); + DBUG_PRINT("ha_archive::write_meta_file", ("Version %d", + (uint)ARCHIVE_VERSION)); + DBUG_PRINT("ha_archive::write_meta_file", ("Rows %llu", (ulonglong)rows)); + DBUG_PRINT("ha_archive::write_meta_file", ("Checkpoint %llu", check_point)); + DBUG_PRINT("ha_archive::write_meta_file", ("Auto Increment %llu", + auto_increment)); + DBUG_PRINT("ha_archive::write_meta_file", ("Forced Flushes %llu", + forced_flushes)); + DBUG_PRINT("ha_archive::write_meta_file", ("Real path %s", + real_path)); + DBUG_PRINT("ha_archive::write_meta_file", ("Dirty %d", (uint)dirty)); + + VOID(my_seek(meta_file, 0, MY_SEEK_SET, MYF(0))); + if (my_write(meta_file, (byte *)meta_buffer, META_BUFFER_SIZE, 0) != META_BUFFER_SIZE) + DBUG_RETURN(-1); + + my_sync(meta_file, MYF(MY_WME)); + + DBUG_RETURN(0); +} + + +/* + We create the shared memory space that we will use for the open table. + No matter what we try to get or create a share. This is so that a repair + table operation can occur. + + See ha_example.cc for a longer description. +*/ +ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, + TABLE *table, int *rc) +{ + ARCHIVE_SHARE *share; + char meta_file_name[FN_REFLEN]; + uint length; + char *tmp_name; + DBUG_ENTER("ha_archive::get_share"); + + pthread_mutex_lock(&archive_mutex); + length=(uint) strlen(table_name); + + if (!(share=(ARCHIVE_SHARE*) hash_search(&archive_open_tables, + (byte*) table_name, + length))) + { + if (!my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), + &share, sizeof(*share), + &tmp_name, length+1, + NullS)) + { + pthread_mutex_unlock(&archive_mutex); + *rc= HA_ERR_OUT_OF_MEM; + DBUG_RETURN(NULL); + } + + share->use_count= 0; + share->table_name_length= length; + share->table_name= tmp_name; + share->crashed= FALSE; + fn_format(share->data_file_name, table_name, "", + ARZ,MY_REPLACE_EXT|MY_UNPACK_FILENAME); + fn_format(meta_file_name, table_name, "", ARM, + MY_REPLACE_EXT|MY_UNPACK_FILENAME); + DBUG_PRINT("info", ("archive opening (1) up write at %s", + share->data_file_name)); + strmov(share->table_name,table_name); + /* + We will use this lock for rows. + */ + VOID(pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST)); + if ((share->meta_file= my_open(meta_file_name, O_RDWR, MYF(0))) == -1) + share->crashed= TRUE; + DBUG_PRINT("info", ("archive opening (1) up write at %s", + share->data_file_name)); + + /* + After we read, we set the file to dirty. When we close, we will do the + opposite. If the meta file will not open we assume it is crashed and + leave it up to the user to fix. + */ + if (read_meta_file(share->meta_file, &share->rows_recorded, + &share->auto_increment_value, + &share->forced_flushes, + share->real_path)) + share->crashed= TRUE; + else + (void)write_meta_file(share->meta_file, share->rows_recorded, + share->auto_increment_value, + share->forced_flushes, + share->real_path, + TRUE); + /* + Since we now possibly no real_path, we will use it instead if it exists. + */ + if (*share->real_path) + fn_format(share->data_file_name, share->real_path, "", ARZ, + MY_REPLACE_EXT|MY_UNPACK_FILENAME); + /* + It is expensive to open and close the data files and since you can't have + a gzip file that can be both read and written we keep a writer open + that is shared amoung all open tables. + */ + if (!(azopen(&(share->archive_write), share->data_file_name, + O_WRONLY|O_APPEND|O_BINARY))) + { + DBUG_PRINT("info", ("Could not open archive write file")); + share->crashed= TRUE; + } + VOID(my_hash_insert(&archive_open_tables, (byte*) share)); + thr_lock_init(&share->lock); + } + share->use_count++; + DBUG_PRINT("info", ("archive table %.*s has %d open handles now", + share->table_name_length, share->table_name, + share->use_count)); + if (share->crashed) + *rc= HA_ERR_CRASHED_ON_USAGE; + pthread_mutex_unlock(&archive_mutex); + + DBUG_RETURN(share); +} + + +/* + Free the share. + See ha_example.cc for a description. +*/ +int ha_archive::free_share(ARCHIVE_SHARE *share) +{ + int rc= 0; + DBUG_ENTER("ha_archive::free_share"); + DBUG_PRINT("info", ("archive table %.*s has %d open handles on entrance", + share->table_name_length, share->table_name, + share->use_count)); + + pthread_mutex_lock(&archive_mutex); + if (!--share->use_count) + { + hash_delete(&archive_open_tables, (byte*) share); + thr_lock_delete(&share->lock); + VOID(pthread_mutex_destroy(&share->mutex)); + /* + We need to make sure we don't reset the crashed state. + If we open a crashed file, wee need to close it as crashed unless + it has been repaired. + Since we will close the data down after this, we go on and count + the flush on close; + */ + share->forced_flushes++; + (void)write_meta_file(share->meta_file, share->rows_recorded, + share->auto_increment_value, + share->forced_flushes, + share->real_path, + share->crashed ? TRUE :FALSE); + if (azclose(&(share->archive_write))) + rc= 1; + if (my_close(share->meta_file, MYF(0))) + rc= 1; + my_free((gptr) share, MYF(0)); + } + pthread_mutex_unlock(&archive_mutex); + + DBUG_RETURN(rc); +} + + +/* + We just implement one additional file extension. +*/ +static const char *ha_archive_exts[] = { + ARZ, + ARM, + NullS +}; + +const char **ha_archive::bas_ext() const +{ + return ha_archive_exts; +} + + +/* + When opening a file we: + Create/get our shared structure. + Init out lock. + We open the file we will read from. +*/ +int ha_archive::open(const char *name, int mode, uint open_options) +{ + int rc= 0; + DBUG_ENTER("ha_archive::open"); + + DBUG_PRINT("info", ("archive table was opened for crash: %s", + (open_options & HA_OPEN_FOR_REPAIR) ? "yes" : "no")); + share= get_share(name, table, &rc); + + if (rc == HA_ERR_CRASHED_ON_USAGE && !(open_options & HA_OPEN_FOR_REPAIR)) + { + free_share(share); + DBUG_RETURN(rc); + } + else if (rc == HA_ERR_OUT_OF_MEM) + { + DBUG_RETURN(rc); + } + + thr_lock_data_init(&share->lock,&lock,NULL); + + DBUG_PRINT("info", ("archive data_file_name %s", share->data_file_name)); + if (!(azopen(&archive, share->data_file_name, O_RDONLY|O_BINARY))) + { + if (errno == EROFS || errno == EACCES) + DBUG_RETURN(my_errno= errno); + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + } + + DBUG_PRINT("info", ("archive table was crashed %s", + rc == HA_ERR_CRASHED_ON_USAGE ? "yes" : "no")); + if (rc == HA_ERR_CRASHED_ON_USAGE && open_options & HA_OPEN_FOR_REPAIR) + { + DBUG_RETURN(0); + } + else + DBUG_RETURN(rc); +} + + +/* + Closes the file. + + SYNOPSIS + close(); + + IMPLEMENTATION: + + We first close this storage engines file handle to the archive and + then remove our reference count to the table (and possibly free it + as well). + + RETURN + 0 ok + 1 Error +*/ + +int ha_archive::close(void) +{ + int rc= 0; + DBUG_ENTER("ha_archive::close"); + + /* First close stream */ + if (azclose(&archive)) + rc= 1; + /* then also close share */ + rc|= free_share(share); + + DBUG_RETURN(rc); +} + + +/* + We create our data file here. The format is pretty simple. + You can read about the format of the data file above. + Unlike other storage engines we do not "pack" our data. Since we + are about to do a general compression, packing would just be a waste of + CPU time. If the table has blobs they are written after the row in the order + of creation. +*/ + +int ha_archive::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + File create_file; // We use to create the datafile and the metafile + char name_buff[FN_REFLEN]; + int error; + DBUG_ENTER("ha_archive::create"); + + auto_increment_value= (create_info->auto_increment_value ? + create_info->auto_increment_value -1 : + (ulonglong) 0); + + if ((create_file= my_create(fn_format(name_buff,name,"",ARM, + MY_REPLACE_EXT|MY_UNPACK_FILENAME),0, + O_RDWR | O_TRUNC,MYF(MY_WME))) < 0) + { + error= my_errno; + goto error; + } + + for (uint key= 0; key < table_arg->s->keys; key++) + { + KEY *pos= table_arg->key_info+key; + KEY_PART_INFO *key_part= pos->key_part; + KEY_PART_INFO *key_part_end= key_part + pos->key_parts; + + for (; key_part != key_part_end; key_part++) + { + Field *field= key_part->field; + + if (!(field->flags & AUTO_INCREMENT_FLAG)) + { + error= -1; + goto error; + } + } + } + + write_meta_file(create_file, 0, auto_increment_value, 0, + (char *)create_info->data_file_name, + FALSE); + my_close(create_file,MYF(0)); + + /* + We reuse name_buff since it is available. + */ + if (create_info->data_file_name) + { + char linkname[FN_REFLEN]; + DBUG_PRINT("info", ("archive will create stream file %s", + create_info->data_file_name)); + + fn_format(name_buff, create_info->data_file_name, "", ARZ, + MY_REPLACE_EXT|MY_UNPACK_FILENAME); + fn_format(linkname, name, "", ARZ, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + if ((create_file= my_create_with_symlink(linkname, name_buff, 0, + O_RDWR | O_TRUNC,MYF(MY_WME))) < 0) + { + error= my_errno; + goto error; + } + } + else + { + if ((create_file= my_create(fn_format(name_buff, name,"", ARZ, + MY_REPLACE_EXT|MY_UNPACK_FILENAME),0, + O_RDWR | O_TRUNC,MYF(MY_WME))) < 0) + { + error= my_errno; + goto error; + } + } + if (!azdopen(&archive, create_file, O_WRONLY|O_BINARY)) + { + error= errno; + goto error2; + } + if (write_data_header(&archive)) + { + error= errno; + goto error3; + } + + if (azclose(&archive)) + { + error= errno; + goto error2; + } + + DBUG_RETURN(0); + +error3: + /* We already have an error, so ignore results of azclose. */ + (void)azclose(&archive); +error2: + my_close(create_file, MYF(0)); + delete_table(name); +error: + /* Return error number, if we got one */ + DBUG_RETURN(error ? error : -1); +} + +/* + This is where the actual row is written out. +*/ +int ha_archive::real_write_row(byte *buf, azio_stream *writer) +{ + my_off_t written; + uint *ptr, *end; + DBUG_ENTER("ha_archive::real_write_row"); + + written= azwrite(writer, buf, table->s->reclength); + DBUG_PRINT("ha_archive::real_write_row", ("Wrote %d bytes expected %d", + written, table->s->reclength)); + if (!delayed_insert || !bulk_insert) + share->dirty= TRUE; + + if (written != (my_off_t)table->s->reclength) + DBUG_RETURN(errno ? errno : -1); + /* + We should probably mark the table as damagaged if the record is written + but the blob fails. + */ + for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields ; + ptr != end ; + ptr++) + { + char *data_ptr; + uint32 size= ((Field_blob*) table->field[*ptr])->get_length(); + + if (size) + { + ((Field_blob*) table->field[*ptr])->get_ptr(&data_ptr); + written= azwrite(writer, data_ptr, (unsigned)size); + if (written != (my_off_t)size) + DBUG_RETURN(errno ? errno : -1); + } + } + DBUG_RETURN(0); +} + + +/* + Look at ha_archive::open() for an explanation of the row format. + Here we just write out the row. + + Wondering about start_bulk_insert()? We don't implement it for + archive since it optimizes for lots of writes. The only save + for implementing start_bulk_insert() is that we could skip + setting dirty to true each time. +*/ +int ha_archive::write_row(byte *buf) +{ + int rc; + byte *read_buf= NULL; + ulonglong temp_auto; + DBUG_ENTER("ha_archive::write_row"); + + if (share->crashed) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + ha_statistic_increment(&SSV::ha_write_count); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + pthread_mutex_lock(&share->mutex); + + if (table->next_number_field) + { + KEY *mkey= &table->s->key_info[0]; // We only support one key right now + update_auto_increment(); + temp_auto= table->next_number_field->val_int(); + + /* + Bad news, this will cause a search for the unique value which is very + expensive since we will have to do a table scan which will lock up + all other writers during this period. This could perhaps be optimized + in the future. + */ + if (temp_auto == share->auto_increment_value && + mkey->flags & HA_NOSAME) + { + rc= HA_ERR_FOUND_DUPP_KEY; + goto error; + } + + if (temp_auto < share->auto_increment_value && + mkey->flags & HA_NOSAME) + { + /* + First we create a buffer that we can use for reading rows, and can pass + to get_row(). + */ + if (!(read_buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME)))) + { + rc= HA_ERR_OUT_OF_MEM; + goto error; + } + /* + All of the buffer must be written out or we won't see all of the + data + */ + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; + /* + Set the position of the local read thread to the beginning postion. + */ + if (read_data_header(&archive)) + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } + + /* + Now we read and check all of the rows. + if (!memcmp(table->next_number_field->ptr, mfield->ptr, mfield->max_length())) + if ((longlong)temp_auto == + mfield->val_int((char*)(read_buf + mfield->offset()))) + */ + Field *mfield= table->next_number_field; + + while (!(get_row(&archive, read_buf))) + { + if (!memcmp(read_buf + mfield->offset(), table->next_number_field->ptr, + mfield->max_length())) + { + rc= HA_ERR_FOUND_DUPP_KEY; + goto error; + } + } + } + else + { + if (temp_auto > share->auto_increment_value) + auto_increment_value= share->auto_increment_value= temp_auto; + } + } + + /* + Notice that the global auto_increment has been increased. + In case of a failed row write, we will never try to reuse the value. + */ + + share->rows_recorded++; + rc= real_write_row(buf, &(share->archive_write)); +error: + pthread_mutex_unlock(&share->mutex); + if (read_buf) + my_free((gptr) read_buf, MYF(0)); + + DBUG_RETURN(rc); +} + + +ulonglong ha_archive::get_auto_increment() +{ + return share->auto_increment_value + 1; +} + +/* Initialized at each key walk (called multiple times unlike rnd_init()) */ +int ha_archive::index_init(uint keynr, bool sorted) +{ + DBUG_ENTER("ha_archive::index_init"); + active_index= keynr; + DBUG_RETURN(0); +} + + +/* + No indexes, so if we get a request for an index search since we tell + the optimizer that we have unique indexes, we scan +*/ +int ha_archive::index_read(byte *buf, const byte *key, + uint key_len, enum ha_rkey_function find_flag) +{ + int rc; + DBUG_ENTER("ha_archive::index_read"); + rc= index_read_idx(buf, active_index, key, key_len, find_flag); + DBUG_RETURN(rc); +} + + +int ha_archive::index_read_idx(byte *buf, uint index, const byte *key, + uint key_len, enum ha_rkey_function find_flag) +{ + int rc= 0; + bool found= 0; + KEY *mkey= &table->s->key_info[index]; + current_k_offset= mkey->key_part->offset; + current_key= key; + current_key_len= key_len; + + + DBUG_ENTER("ha_archive::index_read_idx"); + + /* + All of the buffer must be written out or we won't see all of the + data + */ + pthread_mutex_lock(&share->mutex); + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; + pthread_mutex_unlock(&share->mutex); + + /* + Set the position of the local read thread to the beginning postion. + */ + if (read_data_header(&archive)) + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } + + while (!(get_row(&archive, buf))) + { + if (!memcmp(current_key, buf + current_k_offset, current_key_len)) + { + found= 1; + break; + } + } + + if (found) + DBUG_RETURN(0); + +error: + DBUG_RETURN(rc ? rc : HA_ERR_END_OF_FILE); +} + + +int ha_archive::index_next(byte * buf) +{ + bool found= 0; + + DBUG_ENTER("ha_archive::index_next"); + + while (!(get_row(&archive, buf))) + { + if (!memcmp(current_key, buf+current_k_offset, current_key_len)) + { + found= 1; + break; + } + } + + DBUG_RETURN(found ? 0 : HA_ERR_END_OF_FILE); +} + +/* + All calls that need to scan the table start with this method. If we are told + that it is a table scan we rewind the file to the beginning, otherwise + we assume the position will be set. +*/ + +int ha_archive::rnd_init(bool scan) +{ + DBUG_ENTER("ha_archive::rnd_init"); + + if (share->crashed) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + /* We rewind the file so that we can read from the beginning if scan */ + if (scan) + { + scan_rows= share->rows_recorded; + DBUG_PRINT("info", ("archive will retrieve %llu rows", scan_rows)); + records= 0; + + /* + If dirty, we lock, and then reset/flush the data. + I found that just calling azflush() doesn't always work. + */ + if (share->dirty == TRUE) + { + pthread_mutex_lock(&share->mutex); + if (share->dirty == TRUE) + { + DBUG_PRINT("info", ("archive flushing out rows for scan")); + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; + share->dirty= FALSE; + } + pthread_mutex_unlock(&share->mutex); + } + + if (read_data_header(&archive)) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + } + + DBUG_RETURN(0); +} + + +/* + This is the method that is used to read a row. It assumes that the row is + positioned where you want it. +*/ +int ha_archive::get_row(azio_stream *file_to_read, byte *buf) +{ + int read; // Bytes read, azread() returns int + uint *ptr, *end; + char *last; + size_t total_blob_length= 0; + DBUG_ENTER("ha_archive::get_row"); + + read= azread(file_to_read, buf, table->s->reclength); + DBUG_PRINT("ha_archive::get_row", ("Read %d bytes expected %d", read, + table->s->reclength)); + + if (read == Z_STREAM_ERROR) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + /* If we read nothing we are at the end of the file */ + if (read == 0) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + /* + If the record is the wrong size, the file is probably damaged, unless + we are dealing with a delayed insert or a bulk insert. + */ + if ((ulong) read != table->s->reclength) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + /* Calculate blob length, we use this for our buffer */ + for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ; + ptr != end ; + ptr++) + { + if (ha_get_bit_in_read_set(((Field_blob*) table->field[*ptr])->fieldnr)) + total_blob_length += ((Field_blob*) table->field[*ptr])->get_length(); + } + + /* Adjust our row buffer if we need be */ + buffer.alloc(total_blob_length); + last= (char *)buffer.ptr(); + + /* Loop through our blobs and read them */ + for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ; + ptr != end ; + ptr++) + { + size_t size= ((Field_blob*) table->field[*ptr])->get_length(); + if (size) + { + if (ha_get_bit_in_read_set(((Field_blob*) table->field[*ptr])->fieldnr)) + { + read= azread(file_to_read, last, size); + if ((size_t) read != size) + DBUG_RETURN(HA_ERR_END_OF_FILE); + ((Field_blob*) table->field[*ptr])->set_ptr(size, last); + last += size; + } + else + { + (void)azseek(file_to_read, size, SEEK_CUR); + } + } + } + DBUG_RETURN(0); +} + + +/* + Called during ORDER BY. Its position is either from being called sequentially + or by having had ha_archive::rnd_pos() called before it is called. +*/ + +int ha_archive::rnd_next(byte *buf) +{ + int rc; + DBUG_ENTER("ha_archive::rnd_next"); + + if (share->crashed) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + if (!scan_rows) + DBUG_RETURN(HA_ERR_END_OF_FILE); + scan_rows--; + + ha_statistic_increment(&SSV::ha_read_rnd_next_count); + current_position= aztell(&archive); + rc= get_row(&archive, buf); + + + if (rc != HA_ERR_END_OF_FILE) + records++; + + DBUG_RETURN(rc); +} + + +/* + Thanks to the table flag HA_REC_NOT_IN_SEQ this will be called after + each call to ha_archive::rnd_next() if an ordering of the rows is + needed. +*/ + +void ha_archive::position(const byte *record) +{ + DBUG_ENTER("ha_archive::position"); + my_store_ptr(ref, ref_length, current_position); + DBUG_VOID_RETURN; +} + + +/* + This is called after a table scan for each row if the results of the + scan need to be ordered. It will take *pos and use it to move the + cursor in the file so that the next row that is called is the + correctly ordered row. +*/ + +int ha_archive::rnd_pos(byte * buf, byte *pos) +{ + DBUG_ENTER("ha_archive::rnd_pos"); + ha_statistic_increment(&SSV::ha_read_rnd_next_count); + current_position= (my_off_t)my_get_ptr(pos, ref_length); + (void)azseek(&archive, current_position, SEEK_SET); + + DBUG_RETURN(get_row(&archive, buf)); +} + +/* + This method repairs the meta file. It does this by walking the datafile and + rewriting the meta file. Currently it does this by calling optimize with + the extended flag. +*/ +int ha_archive::repair(THD* thd, HA_CHECK_OPT* check_opt) +{ + DBUG_ENTER("ha_archive::repair"); + check_opt->flags= T_EXTEND; + int rc= optimize(thd, check_opt); + + if (rc) + DBUG_RETURN(HA_ERR_CRASHED_ON_REPAIR); + + share->crashed= FALSE; + DBUG_RETURN(0); +} + +/* + The table can become fragmented if data was inserted, read, and then + inserted again. What we do is open up the file and recompress it completely. +*/ +int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) +{ + DBUG_ENTER("ha_archive::optimize"); + int rc; + azio_stream writer; + char writer_filename[FN_REFLEN]; + + /* Flush any waiting data */ + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; + + /* Lets create a file to contain the new data */ + fn_format(writer_filename, share->table_name, "", ARN, + MY_REPLACE_EXT|MY_UNPACK_FILENAME); + + if (!(azopen(&writer, writer_filename, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY))) + DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); + + /* + An extended rebuild is a lot more effort. We open up each row and re-record it. + Any dead rows are removed (aka rows that may have been partially recorded). + */ + + if (check_opt->flags == T_EXTEND) + { + DBUG_PRINT("info", ("archive extended rebuild")); + byte *buf; + + /* + First we create a buffer that we can use for reading rows, and can pass + to get_row(). + */ + if (!(buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME)))) + { + rc= HA_ERR_OUT_OF_MEM; + goto error; + } + + /* + Now we will rewind the archive file so that we are positioned at the + start of the file. + */ + rc= read_data_header(&archive); + + /* + Assuming now error from rewinding the archive file, we now write out the + new header for out data file. + */ + if (!rc) + rc= write_data_header(&writer); + + /* + On success of writing out the new header, we now fetch each row and + insert it into the new archive file. + */ + if (!rc) + { + share->rows_recorded= 0; + auto_increment_value= share->auto_increment_value= 0; + while (!(rc= get_row(&archive, buf))) + { + real_write_row(buf, &writer); + if (table->found_next_number_field) + { + Field *field= table->found_next_number_field; + ulonglong auto_value= + (ulonglong) field->val_int((char*)(buf + field->offset())); + if (share->auto_increment_value < auto_value) + auto_increment_value= share->auto_increment_value= + auto_value; + } + share->rows_recorded++; + } + } + DBUG_PRINT("info", ("recovered %llu archive rows", share->rows_recorded)); + + my_free((char*)buf, MYF(0)); + if (rc && rc != HA_ERR_END_OF_FILE) + goto error; + } + else + { + DBUG_PRINT("info", ("archive quick rebuild")); + /* + The quick method is to just read the data raw, and then compress it directly. + */ + int read; // Bytes read, azread() returns int + char block[IO_SIZE]; + if (azrewind(&archive) == -1) + { + rc= HA_ERR_CRASHED_ON_USAGE; + DBUG_PRINT("info", ("archive HA_ERR_CRASHED_ON_USAGE")); + goto error; + } + + while ((read= azread(&archive, block, IO_SIZE)) > 0) + azwrite(&writer, block, read); + } + + azclose(&writer); + share->dirty= FALSE; + share->forced_flushes= 0; + azclose(&(share->archive_write)); + DBUG_PRINT("info", ("Reopening archive data file")); + if (!(azopen(&(share->archive_write), share->data_file_name, + O_WRONLY|O_APPEND|O_BINARY))) + { + DBUG_PRINT("info", ("Could not open archive write file")); + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } + + my_rename(writer_filename,share->data_file_name,MYF(0)); + + /* + Now we need to reopen our read descriptor since it has changed. + */ + azclose(&archive); + if (!(azopen(&archive, share->data_file_name, O_RDONLY|O_BINARY))) + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } + + + DBUG_RETURN(0); + +error: + azclose(&writer); + + DBUG_RETURN(rc); +} + +/* + Below is an example of how to setup row level locking. +*/ +THR_LOCK_DATA **ha_archive::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + if (lock_type == TL_WRITE_DELAYED) + delayed_insert= TRUE; + else + delayed_insert= FALSE; + + if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) + { + /* + Here is where we get into the guts of a row level lock. + If TL_UNLOCK is set + If we are not doing a LOCK TABLE or DISCARD/IMPORT + TABLESPACE, then allow multiple writers + */ + + if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && + lock_type <= TL_WRITE) && !thd_in_lock_tables(thd) + && !thd_tablespace_op(thd)) + lock_type = TL_WRITE_ALLOW_WRITE; + + /* + In queries of type INSERT INTO t1 SELECT ... FROM t2 ... + MySQL would use the lock TL_READ_NO_INSERT on t2, and that + would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts + to t2. Convert the lock to a normal read lock to allow + concurrent inserts to t2. + */ + + if (lock_type == TL_READ_NO_INSERT && !thd_in_lock_tables(thd)) + lock_type = TL_READ; + + lock.type=lock_type; + } + + *to++= &lock; + + return to; +} + +void ha_archive::update_create_info(HA_CREATE_INFO *create_info) +{ + ha_archive::info(HA_STATUS_AUTO | HA_STATUS_CONST); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value= auto_increment_value; + } + if (*share->real_path) + create_info->data_file_name= share->real_path; +} + + +/* + Hints for optimizer, see ha_tina for more information +*/ +void ha_archive::info(uint flag) +{ + DBUG_ENTER("ha_archive::info"); + /* + This should be an accurate number now, though bulk and delayed inserts can + cause the number to be inaccurate. + */ + records= share->rows_recorded; + deleted= 0; + /* Costs quite a bit more to get all information */ + if (flag & HA_STATUS_TIME) + { + MY_STAT file_stat; // Stat information for the data file + + VOID(my_stat(share->data_file_name, &file_stat, MYF(MY_WME))); + + mean_rec_length= table->s->reclength + buffer.alloced_length(); + data_file_length= file_stat.st_size; + create_time= file_stat.st_ctime; + update_time= file_stat.st_mtime; + max_data_file_length= share->rows_recorded * mean_rec_length; + } + delete_length= 0; + index_file_length=0; + + if (flag & HA_STATUS_AUTO) + auto_increment_value= share->auto_increment_value; + + DBUG_VOID_RETURN; +} + + +/* + This method tells us that a bulk insert operation is about to occur. We set + a flag which will keep write_row from saying that its data is dirty. This in + turn will keep selects from causing a sync to occur. + Basically, yet another optimizations to keep compression working well. +*/ +void ha_archive::start_bulk_insert(ha_rows rows) +{ + DBUG_ENTER("ha_archive::start_bulk_insert"); + if (!rows || rows >= ARCHIVE_MIN_ROWS_TO_USE_BULK_INSERT) + bulk_insert= TRUE; + DBUG_VOID_RETURN; +} + + +/* + Other side of start_bulk_insert, is end_bulk_insert. Here we turn off the bulk insert + flag, and set the share dirty so that the next select will call sync for us. +*/ +int ha_archive::end_bulk_insert() +{ + DBUG_ENTER("ha_archive::end_bulk_insert"); + bulk_insert= FALSE; + share->dirty= TRUE; + DBUG_RETURN(0); +} + +/* + We cancel a truncate command. The only way to delete an archive table is to drop it. + This is done for security reasons. In a later version we will enable this by + allowing the user to select a different row format. +*/ +int ha_archive::delete_all_rows() +{ + DBUG_ENTER("ha_archive::delete_all_rows"); + DBUG_RETURN(0); +} + +/* + We just return state if asked. +*/ +bool ha_archive::is_crashed() const +{ + DBUG_ENTER("ha_archive::is_crashed"); + DBUG_RETURN(share->crashed); +} + +/* + Simple scan of the tables to make sure everything is ok. +*/ + +int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) +{ + int rc= 0; + byte *buf; + const char *old_proc_info; + ha_rows count= share->rows_recorded; + DBUG_ENTER("ha_archive::check"); + + old_proc_info= thd_proc_info(thd, "Checking table"); + /* Flush any waiting data */ + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; + + /* + First we create a buffer that we can use for reading rows, and can pass + to get_row(). + */ + if (!(buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME)))) + rc= HA_ERR_OUT_OF_MEM; + + /* + Now we will rewind the archive file so that we are positioned at the + start of the file. + */ + if (!rc) + read_data_header(&archive); + + if (!rc) + while (!(rc= get_row(&archive, buf))) + count--; + + my_free((char*)buf, MYF(0)); + + thd_proc_info(thd, old_proc_info); + + if ((rc && rc != HA_ERR_END_OF_FILE) || count) + { + share->crashed= FALSE; + DBUG_RETURN(HA_ADMIN_CORRUPT); + } + else + { + DBUG_RETURN(HA_ADMIN_OK); + } +} + +/* + Check and repair the table if needed. +*/ +bool ha_archive::check_and_repair(THD *thd) +{ + HA_CHECK_OPT check_opt; + DBUG_ENTER("ha_archive::check_and_repair"); + + check_opt.init(); + + DBUG_RETURN(repair(thd, &check_opt)); +} + + +mysql_declare_plugin(archive) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &archive_hton, + archive_hton.name, + "Brian Aker, MySQL AB", + "Archive Storage Engine", + NULL, /* Plugin Init */ + archive_db_done, /* Plugin Deinit */ + 0x0100 /* 1.0 */, +} +mysql_declare_plugin_end; diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h new file mode 100644 index 00000000000..52854d0e9ac --- /dev/null +++ b/storage/archive/ha_archive.h @@ -0,0 +1,139 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include <zlib.h> +#include "azlib.h" + +/* + Please read ha_archive.cc first. If you are looking for more general + answers on how storage engines work, look at ha_example.cc and + ha_example.h. +*/ + +typedef struct st_archive_share { + char *table_name; + char data_file_name[FN_REFLEN]; + uint table_name_length,use_count; + pthread_mutex_t mutex; + THR_LOCK lock; + File meta_file; /* Meta file we use */ + azio_stream archive_write; /* Archive file we are working with */ + bool dirty; /* Flag for if a flush should occur */ + bool crashed; /* Meta file is crashed */ + ha_rows rows_recorded; /* Number of rows in tables */ + ulonglong auto_increment_value; + ulonglong forced_flushes; + ulonglong mean_rec_length; + char real_path[FN_REFLEN]; +} ARCHIVE_SHARE; + +/* + Version for file format. + 1 - Initial Version +*/ +#define ARCHIVE_VERSION 2 + +class ha_archive: public handler +{ + THR_LOCK_DATA lock; /* MySQL lock */ + ARCHIVE_SHARE *share; /* Shared lock info */ + azio_stream archive; /* Archive file we are working with */ + my_off_t current_position; /* The position of the row we just read */ + byte byte_buffer[IO_SIZE]; /* Initial buffer for our string */ + String buffer; /* Buffer used for blob storage */ + ha_rows scan_rows; /* Number of rows left in scan */ + bool delayed_insert; /* If the insert is delayed */ + bool bulk_insert; /* If we are performing a bulk insert */ + const byte *current_key; + uint current_key_len; + uint current_k_offset; + +public: + ha_archive(TABLE_SHARE *table_arg); + ~ha_archive() + { + } + const char *table_type() const { return "ARCHIVE"; } + const char *index_type(uint inx) { return "NONE"; } + const char **bas_ext() const; + ulong table_flags() const + { + return (HA_REC_NOT_IN_SEQ | HA_NOT_EXACT_COUNT | HA_CAN_BIT_FIELD | + HA_FILE_BASED | HA_CAN_INSERT_DELAYED | HA_CAN_GEOMETRY); + } + ulong index_flags(uint idx, uint part, bool all_parts) const + { + return HA_ONLY_WHOLE_INDEX; + } + ulonglong get_auto_increment(); + uint max_supported_keys() const { return 1; } + uint max_supported_key_length() const { return sizeof(ulonglong); } + uint max_supported_key_part_length() const { return sizeof(ulonglong); } + int index_init(uint keynr, bool sorted); + virtual int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_next(byte * buf); + int open(const char *name, int mode, uint test_if_locked); + int close(void); + int write_row(byte * buf); + int real_write_row(byte *buf, azio_stream *writer); + int delete_all_rows(); + int rnd_init(bool scan=1); + int rnd_next(byte *buf); + int rnd_pos(byte * buf, byte *pos); + int get_row(azio_stream *file_to_read, byte *buf); + int read_meta_file(File meta_file, ha_rows *rows, + ulonglong *auto_increment, + ulonglong *forced_flushes, + char *real_path); + int write_meta_file(File meta_file, ha_rows rows, + ulonglong auto_increment, + ulonglong forced_flushes, + char *real_path, + bool dirty); + ARCHIVE_SHARE *get_share(const char *table_name, TABLE *table, int *rc); + int free_share(ARCHIVE_SHARE *share); + bool auto_repair() const { return 1; } // For the moment we just do this + int read_data_header(azio_stream *file_to_read); + int write_data_header(azio_stream *file_to_write); + void position(const byte *record); + void info(uint); + void update_create_info(HA_CREATE_INFO *create_info); + int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); + int optimize(THD* thd, HA_CHECK_OPT* check_opt); + int repair(THD* thd, HA_CHECK_OPT* check_opt); + void start_bulk_insert(ha_rows rows); + int end_bulk_insert(); + enum row_type get_row_type() const + { + return ROW_TYPE_COMPRESSED; + } + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type); + bool is_crashed() const; + int check(THD* thd, HA_CHECK_OPT* check_opt); + bool check_and_repair(THD *thd); +}; + +bool archive_db_init(void); +int archive_db_end(ha_panic_function type); + |