summaryrefslogtreecommitdiff
path: root/sql/ha_archive.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/ha_archive.cc')
-rw-r--r--sql/ha_archive.cc1257
1 files changed, 0 insertions, 1257 deletions
diff --git a/sql/ha_archive.cc b/sql/ha_archive.cc
deleted file mode 100644
index 0f714cc2008..00000000000
--- a/sql/ha_archive.cc
+++ /dev/null
@@ -1,1257 +0,0 @@
-/* Copyright (C) 2003 MySQL AB
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
-#endif
-
-#include "mysql_priv.h"
-
-#if defined(HAVE_ARCHIVE_DB)
-#include "ha_archive.h"
-#include <my_dir.h>
-
-/*
- First, if you want to understand storage engines you should look at
- ha_example.cc and ha_example.h.
- This example was written as a test case for a customer who needed
- a storage engine without indexes that could compress data very well.
- So, welcome to a completely compressed storage engine. This storage
- engine only does inserts. No replace, deletes, or updates. All reads are
- complete table scans. Compression is done through gzip (bzip compresses
- better, but only marginally, if someone asks I could add support for
- it too, but beaware that it costs a lot more in CPU time then gzip).
-
- We keep a file pointer open for each instance of ha_archive for each read
- but for writes we keep one open file handle just for that. We flush it
- only if we have a read occur. gzip handles compressing lots of records
- at once much better then doing lots of little records between writes.
- It is possible to not lock on writes but this would then mean we couldn't
- handle bulk inserts as well (that is if someone was trying to read at
- the same time since we would want to flush).
-
- A "meta" file is kept alongside the data file. This file serves two purpose.
- The first purpose is to track the number of rows in the table. The second
- purpose is to determine if the table was closed properly or not. When the
- meta file is first opened it is marked as dirty. It is opened when the table
- itself is opened for writing. When the table is closed the new count for rows
- is written to the meta file and the file is marked as clean. If the meta file
- is opened and it is marked as dirty, it is assumed that a crash occured. At
- this point an error occurs and the user is told to rebuild the file.
- A rebuild scans the rows and rewrites the meta file. If corruption is found
- in the data file then the meta file is not repaired.
-
- At some point a recovery method for such a drastic case needs to be divised.
-
- Locks are row level, and you will get a consistant read.
-
- For performance as far as table scans go it is quite fast. I don't have
- good numbers but locally it has out performed both Innodb and MyISAM. For
- Innodb the question will be if the table can be fit into the buffer
- pool. For MyISAM its a question of how much the file system caches the
- MyISAM file. With enough free memory MyISAM is faster. Its only when the OS
- doesn't have enough memory to cache entire table that archive turns out
- to be any faster. For writes it is always a bit slower then MyISAM. It has no
- internal limits though for row length.
-
- Examples between MyISAM (packed) and Archive.
-
- Table with 76695844 identical rows:
- 29680807 a_archive.ARZ
- 920350317 a.MYD
-
-
- Table with 8991478 rows (all of Slashdot's comments):
- 1922964506 comment_archive.ARZ
- 2944970297 comment_text.MYD
-
-
- TODO:
- Add bzip optional support.
- Allow users to set compression level.
- Add truncate table command.
- Implement versioning, should be easy.
- Allow for errors, find a way to mark bad rows.
- Talk to the gzip guys, come up with a writable format so that updates are doable
- without switching to a block method.
- Add optional feature so that rows can be flushed at interval (which will cause less
- compression but may speed up ordered searches).
- Checkpoint the meta file to allow for faster rebuilds.
- Dirty open (right now the meta file is repaired if a crash occured).
- Option to allow for dirty reads, this would lower the sync calls, which would make
- inserts a lot faster, but would mean highly arbitrary reads.
-
- -Brian
-*/
-/*
- Notes on file formats.
- The Meta file is layed out as:
- check - Just an int of 254 to make sure that the the file we are opening was
- never corrupted.
- version - The current version of the file format.
- rows - This is an unsigned long long which is the number of rows in the data
- file.
- check point - Reserved for future use
- dirty - Status of the file, whether or not its values are the latest. This
- flag is what causes a repair to occur
-
- The data file:
- check - Just an int of 254 to make sure that the the file we are opening was
- never corrupted.
- version - The current version of the file format.
- data - The data is stored in a "row +blobs" format.
-*/
-
-/* If the archive storage engine has been inited */
-static bool archive_inited= FALSE;
-/* Variables for archive share methods */
-pthread_mutex_t archive_mutex;
-static HASH archive_open_tables;
-static z_off_t max_zfile_size;
-static int zoffset_size;
-
-/* The file extension */
-#define ARZ ".ARZ" // The data file
-#define ARN ".ARN" // Files used during an optimize call
-#define ARM ".ARM" // Meta file
-/*
- uchar + uchar + ulonglong + ulonglong + uchar
-*/
-#define META_BUFFER_SIZE 19 // Size of the data used in the meta file
-/*
- uchar + uchar
-*/
-#define DATA_BUFFER_SIZE 2 // Size of the data used in the data file
-#define ARCHIVE_CHECK_HEADER 254 // The number we use to determine corruption
-
-/*
- Number of rows that will force a bulk insert.
-*/
-#define ARCHIVE_MIN_ROWS_TO_USE_BULK_INSERT 2
-
-
-
-/* dummy handlerton - only to have something to return from archive_db_init */
-handlerton archive_hton = {
- "ARCHIVE",
- SHOW_OPTION_YES,
- "Archive storage engine",
- DB_TYPE_ARCHIVE_DB,
- archive_db_init,
- 0, /* slot */
- 0, /* savepoint size. */
- NULL, /* close_connection */
- NULL, /* savepoint */
- NULL, /* rollback to savepoint */
- NULL, /* releas savepoint */
- NULL, /* commit */
- NULL, /* rollback */
- NULL, /* prepare */
- NULL, /* recover */
- NULL, /* commit_by_xid */
- NULL, /* rollback_by_xid */
- NULL, /* create_cursor_read_view */
- NULL, /* set_cursor_read_view */
- NULL, /* close_cursor_read_view */
- HTON_NO_FLAGS
-};
-
-
-/*
- Used for hash table that tracks open tables.
-*/
-static byte* archive_get_key(ARCHIVE_SHARE *share,uint *length,
- my_bool not_used __attribute__((unused)))
-{
- *length=share->table_name_length;
- return (byte*) share->table_name;
-}
-
-
-/*
- Initialize the archive handler.
-
- SYNOPSIS
- archive_db_init()
- void
-
- RETURN
- FALSE OK
- TRUE Error
-*/
-
-bool archive_db_init()
-{
- DBUG_ENTER("archive_db_init");
- if (pthread_mutex_init(&archive_mutex, MY_MUTEX_INIT_FAST))
- goto error;
- if (hash_init(&archive_open_tables, system_charset_info, 32, 0, 0,
- (hash_get_key) archive_get_key, 0, 0))
- {
- VOID(pthread_mutex_destroy(&archive_mutex));
- }
- else
- {
- zoffset_size= 2 << ((zlibCompileFlags() >> 6) & 3);
- switch (sizeof(z_off_t)) {
- case 2:
- max_zfile_size= INT_MAX16;
- break;
- case 8:
- max_zfile_size= (z_off_t) LONGLONG_MAX;
- break;
- case 4:
- default:
- max_zfile_size= INT_MAX32;
- }
- archive_inited= TRUE;
- DBUG_RETURN(FALSE);
- }
-error:
- have_archive_db= SHOW_OPTION_DISABLED; // If we couldn't use handler
- DBUG_RETURN(TRUE);
-}
-
-/*
- Release the archive handler.
-
- SYNOPSIS
- archive_db_end()
- void
-
- RETURN
- FALSE OK
-*/
-
-bool archive_db_end()
-{
- if (archive_inited)
- {
- hash_free(&archive_open_tables);
- VOID(pthread_mutex_destroy(&archive_mutex));
- }
- archive_inited= 0;
- return FALSE;
-}
-
-ha_archive::ha_archive(TABLE *table_arg)
- :handler(&archive_hton, table_arg), delayed_insert(0), bulk_insert(0)
-{
- /* Set our original buffer from pre-allocated memory */
- buffer.set((char *)byte_buffer, IO_SIZE, system_charset_info);
-
- /* The size of the offset value we will use for position() */
- ref_length = zoffset_size;
- DBUG_ASSERT(ref_length <= sizeof(z_off_t));
-}
-
-/*
- This method reads the header of a datafile and returns whether or not it was successful.
-*/
-int ha_archive::read_data_header(gzFile file_to_read)
-{
- uchar data_buffer[DATA_BUFFER_SIZE];
- DBUG_ENTER("ha_archive::read_data_header");
-
- if (gzrewind(file_to_read) == -1)
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- if (gzread(file_to_read, data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE)
- DBUG_RETURN(errno ? errno : -1);
-
- DBUG_PRINT("ha_archive::read_data_header", ("Check %u", data_buffer[0]));
- DBUG_PRINT("ha_archive::read_data_header", ("Version %u", data_buffer[1]));
-
- if ((data_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) &&
- (data_buffer[1] != (uchar)ARCHIVE_VERSION))
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- DBUG_RETURN(0);
-}
-
-/*
- This method writes out the header of a datafile and returns whether or not it was successful.
-*/
-int ha_archive::write_data_header(gzFile file_to_write)
-{
- uchar data_buffer[DATA_BUFFER_SIZE];
- DBUG_ENTER("ha_archive::write_data_header");
-
- data_buffer[0]= (uchar)ARCHIVE_CHECK_HEADER;
- data_buffer[1]= (uchar)ARCHIVE_VERSION;
-
- if (gzwrite(file_to_write, &data_buffer, DATA_BUFFER_SIZE) !=
- DATA_BUFFER_SIZE)
- goto error;
- DBUG_PRINT("ha_archive::write_data_header", ("Check %u", (uint)data_buffer[0]));
- DBUG_PRINT("ha_archive::write_data_header", ("Version %u", (uint)data_buffer[1]));
-
- DBUG_RETURN(0);
-error:
- DBUG_RETURN(errno);
-}
-
-/*
- This method reads the header of a meta file and returns whether or not it was successful.
- *rows will contain the current number of rows in the data file upon success.
-*/
-int ha_archive::read_meta_file(File meta_file, ha_rows *rows)
-{
- uchar meta_buffer[META_BUFFER_SIZE];
- ulonglong check_point;
-
- DBUG_ENTER("ha_archive::read_meta_file");
-
- VOID(my_seek(meta_file, 0, MY_SEEK_SET, MYF(0)));
- if (my_read(meta_file, (byte*)meta_buffer, META_BUFFER_SIZE, 0) != META_BUFFER_SIZE)
- DBUG_RETURN(-1);
-
- /*
- Parse out the meta data, we ignore version at the moment
- */
- *rows= (ha_rows)uint8korr(meta_buffer + 2);
- check_point= uint8korr(meta_buffer + 10);
-
- DBUG_PRINT("ha_archive::read_meta_file", ("Check %d", (uint)meta_buffer[0]));
- DBUG_PRINT("ha_archive::read_meta_file", ("Version %d", (uint)meta_buffer[1]));
- DBUG_PRINT("ha_archive::read_meta_file", ("Rows %lu", (ulong) *rows));
- DBUG_PRINT("ha_archive::read_meta_file", ("Checkpoint %lu", (ulong) check_point));
- DBUG_PRINT("ha_archive::read_meta_file", ("Dirty %d", (int)meta_buffer[18]));
-
- if ((meta_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) ||
- ((bool)meta_buffer[18] == TRUE))
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- my_sync(meta_file, MYF(MY_WME));
-
- DBUG_RETURN(0);
-}
-
-/*
- This method writes out the header of a meta file and returns whether or not it was successful.
- By setting dirty you say whether or not the file represents the actual state of the data file.
- Upon ::open() we set to dirty, and upon ::close() we set to clean.
-*/
-int ha_archive::write_meta_file(File meta_file, ha_rows rows, bool dirty)
-{
- uchar meta_buffer[META_BUFFER_SIZE];
- ulonglong check_point= 0; //Reserved for the future
-
- DBUG_ENTER("ha_archive::write_meta_file");
-
- meta_buffer[0]= (uchar)ARCHIVE_CHECK_HEADER;
- meta_buffer[1]= (uchar)ARCHIVE_VERSION;
- int8store(meta_buffer + 2, (ulonglong)rows);
- int8store(meta_buffer + 10, check_point);
- *(meta_buffer + 18)= (uchar)dirty;
- DBUG_PRINT("ha_archive::write_meta_file", ("Check %d", (uint)ARCHIVE_CHECK_HEADER));
- DBUG_PRINT("ha_archive::write_meta_file", ("Version %d", (uint)ARCHIVE_VERSION));
- DBUG_PRINT("ha_archive::write_meta_file", ("Rows %lu", (ulong)rows));
- DBUG_PRINT("ha_archive::write_meta_file", ("Checkpoint %lu", (ulong) check_point));
- DBUG_PRINT("ha_archive::write_meta_file", ("Dirty %d", (uint)dirty));
-
- VOID(my_seek(meta_file, 0, MY_SEEK_SET, MYF(0)));
- if (my_write(meta_file, (byte *)meta_buffer, META_BUFFER_SIZE, 0) != META_BUFFER_SIZE)
- DBUG_RETURN(-1);
-
- my_sync(meta_file, MYF(MY_WME));
-
- DBUG_RETURN(0);
-}
-
-
-/*
- We create the shared memory space that we will use for the open table.
- No matter what we try to get or create a share. This is so that a repair
- table operation can occur.
-
- See ha_example.cc for a longer description.
-*/
-ARCHIVE_SHARE *ha_archive::get_share(const char *table_name,
- TABLE *table, int *rc)
-{
- ARCHIVE_SHARE *share;
- char meta_file_name[FN_REFLEN];
- uint length;
- char *tmp_name;
- DBUG_ENTER("ha_archive::get_share");
-
- pthread_mutex_lock(&archive_mutex);
- length=(uint) strlen(table_name);
-
- if (!(share=(ARCHIVE_SHARE*) hash_search(&archive_open_tables,
- (byte*) table_name,
- length)))
- {
- if (!my_multi_malloc(MYF(MY_WME | MY_ZEROFILL),
- &share, sizeof(*share),
- &tmp_name, length+1,
- NullS))
- {
- pthread_mutex_unlock(&archive_mutex);
- *rc= HA_ERR_OUT_OF_MEM;
- DBUG_RETURN(NULL);
- }
-
- share->use_count= 0;
- share->table_name_length= length;
- share->table_name= tmp_name;
- share->crashed= FALSE;
- share->archive_write_open= FALSE;
- fn_format(share->data_file_name,table_name,"",ARZ,MY_REPLACE_EXT|MY_UNPACK_FILENAME);
- fn_format(meta_file_name,table_name,"",ARM,MY_REPLACE_EXT|MY_UNPACK_FILENAME);
- strmov(share->table_name,table_name);
- /*
- We will use this lock for rows.
- */
- VOID(pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST));
- if ((share->meta_file= my_open(meta_file_name, O_RDWR, MYF(0))) == -1)
- share->crashed= TRUE;
-
- /*
- After we read, we set the file to dirty. When we close, we will do the
- opposite. If the meta file will not open we assume it is crashed and
- leave it up to the user to fix.
- */
- if (read_meta_file(share->meta_file, &share->rows_recorded))
- share->crashed= TRUE;
-
- VOID(my_hash_insert(&archive_open_tables, (byte*) share));
- thr_lock_init(&share->lock);
- }
- share->use_count++;
- DBUG_PRINT("info", ("archive table %.*s has %d open handles now",
- share->table_name_length, share->table_name,
- share->use_count));
- if (share->crashed)
- *rc= HA_ERR_CRASHED_ON_USAGE;
- pthread_mutex_unlock(&archive_mutex);
-
- DBUG_RETURN(share);
-}
-
-
-/*
- Free the share.
- See ha_example.cc for a description.
-*/
-int ha_archive::free_share(ARCHIVE_SHARE *share)
-{
- int rc= 0;
- DBUG_ENTER("ha_archive::free_share");
- DBUG_PRINT("info", ("archive table %.*s has %d open handles on entrance",
- share->table_name_length, share->table_name,
- share->use_count));
-
- pthread_mutex_lock(&archive_mutex);
- if (!--share->use_count)
- {
- hash_delete(&archive_open_tables, (byte*) share);
- thr_lock_delete(&share->lock);
- VOID(pthread_mutex_destroy(&share->mutex));
- if (share->crashed)
- (void)write_meta_file(share->meta_file, share->rows_recorded, TRUE);
- else
- (void)write_meta_file(share->meta_file, share->rows_recorded, FALSE);
- if (share->archive_write_open)
- if (gzclose(share->archive_write) == Z_ERRNO)
- rc= 1;
- if (my_close(share->meta_file, MYF(0)))
- rc= 1;
- my_free((gptr) share, MYF(0));
- }
- pthread_mutex_unlock(&archive_mutex);
-
- DBUG_RETURN(rc);
-}
-
-int ha_archive::init_archive_writer()
-{
- DBUG_ENTER("ha_archive::init_archive_writer");
- (void)write_meta_file(share->meta_file, share->rows_recorded, TRUE);
-
- /*
- It is expensive to open and close the data files and since you can't have
- a gzip file that can be both read and written we keep a writer open
- that is shared amoung all open tables.
- */
- if ((share->archive_write= gzopen(share->data_file_name, "ab")) == NULL)
- {
- share->crashed= TRUE;
- DBUG_RETURN(1);
- }
- share->archive_write_open= TRUE;
- info(HA_STATUS_TIME);
- share->approx_file_size= (ulong) data_file_length;
- DBUG_RETURN(0);
-}
-
-
-/*
- We just implement one additional file extension.
-*/
-static const char *ha_archive_exts[] = {
- ARZ,
- ARM,
- NullS
-};
-
-const char **ha_archive::bas_ext() const
-{
- return ha_archive_exts;
-}
-
-
-/*
- When opening a file we:
- Create/get our shared structure.
- Init out lock.
- We open the file we will read from.
-*/
-int ha_archive::open(const char *name, int mode, uint open_options)
-{
- int rc= 0;
- DBUG_ENTER("ha_archive::open");
-
- DBUG_PRINT("info", ("archive table was opened for crash %s",
- (open_options & HA_OPEN_FOR_REPAIR) ? "yes" : "no"));
- share= get_share(name, table, &rc);
-
- if (rc == HA_ERR_CRASHED_ON_USAGE && !(open_options & HA_OPEN_FOR_REPAIR))
- {
- free_share(share);
- DBUG_RETURN(rc);
- }
- else if (rc == HA_ERR_OUT_OF_MEM)
- {
- DBUG_RETURN(rc);
- }
-
- thr_lock_data_init(&share->lock,&lock,NULL);
-
- if ((archive= gzopen(share->data_file_name, "rb")) == NULL)
- {
- if (errno == EROFS || errno == EACCES)
- DBUG_RETURN(my_errno= errno);
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
- }
-
- DBUG_PRINT("info", ("archive table was crashed %s",
- rc == HA_ERR_CRASHED_ON_USAGE ? "yes" : "no"));
- if (rc == HA_ERR_CRASHED_ON_USAGE && open_options & HA_OPEN_FOR_REPAIR)
- {
- DBUG_RETURN(0);
- }
- else
- DBUG_RETURN(rc);
-}
-
-
-/*
- Closes the file.
-
- SYNOPSIS
- close();
-
- IMPLEMENTATION:
-
- We first close this storage engines file handle to the archive and
- then remove our reference count to the table (and possibly free it
- as well).
-
- RETURN
- 0 ok
- 1 Error
-*/
-
-int ha_archive::close(void)
-{
- int rc= 0;
- DBUG_ENTER("ha_archive::close");
-
- /* First close stream */
- if (gzclose(archive) == Z_ERRNO)
- rc= 1;
- /* then also close share */
- rc|= free_share(share);
-
- DBUG_RETURN(rc);
-}
-
-
-/*
- We create our data file here. The format is pretty simple.
- You can read about the format of the data file above.
- Unlike other storage engines we do not "pack" our data. Since we
- are about to do a general compression, packing would just be a waste of
- CPU time. If the table has blobs they are written after the row in the order
- of creation.
-*/
-
-int ha_archive::create(const char *name, TABLE *table_arg,
- HA_CREATE_INFO *create_info)
-{
- File create_file; // We use to create the datafile and the metafile
- char name_buff[FN_REFLEN];
- int error;
- DBUG_ENTER("ha_archive::create");
-
- if ((create_file= my_create(fn_format(name_buff,name,"",ARM,
- MY_REPLACE_EXT|MY_UNPACK_FILENAME),0,
- O_RDWR | O_TRUNC,MYF(MY_WME))) < 0)
- {
- error= my_errno;
- goto error;
- }
- write_meta_file(create_file, 0, FALSE);
- my_close(create_file,MYF(0));
-
- /*
- We reuse name_buff since it is available.
- */
- if ((create_file= my_create(fn_format(name_buff,name,"",ARZ,
- MY_REPLACE_EXT|MY_UNPACK_FILENAME),0,
- O_RDWR | O_TRUNC,MYF(MY_WME))) < 0)
- {
- error= my_errno;
- goto error;
- }
- if ((archive= gzdopen(dup(create_file), "wb")) == NULL)
- {
- error= errno;
- goto error2;
- }
- if (write_data_header(archive))
- {
- error= errno;
- goto error3;
- }
-
- if (gzclose(archive))
- {
- error= errno;
- goto error2;
- }
-
- my_close(create_file, MYF(0));
-
- DBUG_RETURN(0);
-
-error3:
- /* We already have an error, so ignore results of gzclose. */
- (void)gzclose(archive);
-error2:
- my_close(create_file, MYF(0));
- delete_table(name);
-error:
- /* Return error number, if we got one */
- DBUG_RETURN(error ? error : -1);
-}
-
-/*
- This is where the actual row is written out.
-*/
-int ha_archive::real_write_row(byte *buf, gzFile writer)
-{
- z_off_t written, total_row_length;
- uint *ptr, *end;
- DBUG_ENTER("ha_archive::real_write_row");
- total_row_length= table->s->reclength;
- for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields;
- ptr != end; ptr++)
- total_row_length+= ((Field_blob*) table->field[*ptr])->get_length();
- if (share->approx_file_size > max_zfile_size - total_row_length)
- {
- info(HA_STATUS_TIME);
- share->approx_file_size= (ulong) data_file_length;
- if (share->approx_file_size > max_zfile_size - total_row_length)
- DBUG_RETURN(HA_ERR_RECORD_FILE_FULL);
- }
- share->approx_file_size+= total_row_length;
- written= gzwrite(writer, buf, table->s->reclength);
- DBUG_PRINT("ha_archive::real_write_row", ("Wrote %d bytes expected %lu", (int) written,
- table->s->reclength));
- if (!delayed_insert || !bulk_insert)
- share->dirty= TRUE;
-
- if (written != (z_off_t)table->s->reclength)
- DBUG_RETURN(errno ? errno : -1);
- /*
- We should probably mark the table as damagaged if the record is written
- but the blob fails.
- */
- for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields ;
- ptr != end ;
- ptr++)
- {
- char *data_ptr;
- uint32 size= ((Field_blob*) table->field[*ptr])->get_length();
-
- if (size)
- {
- ((Field_blob*) table->field[*ptr])->get_ptr(&data_ptr);
- written= gzwrite(writer, data_ptr, (unsigned)size);
- if (written != (z_off_t)size)
- DBUG_RETURN(errno ? errno : -1);
- }
- }
- DBUG_RETURN(0);
-}
-
-
-/*
- Look at ha_archive::open() for an explanation of the row format.
- Here we just write out the row.
-
- Wondering about start_bulk_insert()? We don't implement it for
- archive since it optimizes for lots of writes. The only save
- for implementing start_bulk_insert() is that we could skip
- setting dirty to true each time.
-*/
-int ha_archive::write_row(byte *buf)
-{
- int rc;
- DBUG_ENTER("ha_archive::write_row");
-
- if (share->crashed)
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status);
- if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
- table->timestamp_field->set_time();
- pthread_mutex_lock(&share->mutex);
- if (!share->archive_write_open)
- if (init_archive_writer())
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- /*
- Varchar structures are constant in size but are not cleaned up request
- to request. The following sets all unused space to null to improve
- compression.
- */
- for (Field **field=table->field ; *field ; field++)
- {
- DBUG_PRINT("archive",("Pack is %d\n", (*field)->pack_length()));
- DBUG_PRINT("archive",("MyPack is %d\n", (*field)->data_length((char*) buf + (*field)->offset())));
- if ((*field)->real_type() == MYSQL_TYPE_VARCHAR)
- {
- uint actual_length= (*field)->data_length((char*) buf + (*field)->offset());
- uint offset= (*field)->offset() + actual_length +
- (actual_length > 255 ? 2 : 1);
- DBUG_PRINT("archive",("Offset is %d -> %d\n", actual_length, offset));
- /*
- if ((*field)->pack_length() + (*field)->offset() != offset)
- bzero(buf + offset, (size_t)((*field)->pack_length() + (actual_length > 255 ? 2 : 1) - (*field)->data_length));
- */
- }
- }
-
- share->rows_recorded++;
- rc= real_write_row(buf, share->archive_write);
- pthread_mutex_unlock(&share->mutex);
-
- DBUG_RETURN(rc);
-}
-
-/*
- All calls that need to scan the table start with this method. If we are told
- that it is a table scan we rewind the file to the beginning, otherwise
- we assume the position will be set.
-*/
-
-int ha_archive::rnd_init(bool scan)
-{
- DBUG_ENTER("ha_archive::rnd_init");
-
- if (share->crashed)
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- /* We rewind the file so that we can read from the beginning if scan */
- if (scan)
- {
- scan_rows= share->rows_recorded;
- DBUG_PRINT("info", ("archive will retrieve %lu rows", (ulong) scan_rows));
- records= 0;
-
- /*
- If dirty, we lock, and then reset/flush the data.
- I found that just calling gzflush() doesn't always work.
- */
- if (share->dirty == TRUE)
- {
- pthread_mutex_lock(&share->mutex);
- if (share->dirty == TRUE)
- {
- DBUG_PRINT("info", ("archive flushing out rows for scan"));
- gzflush(share->archive_write, Z_SYNC_FLUSH);
- share->dirty= FALSE;
- }
- pthread_mutex_unlock(&share->mutex);
- }
-
- if (read_data_header(archive))
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
- }
-
- DBUG_RETURN(0);
-}
-
-
-/*
- This is the method that is used to read a row. It assumes that the row is
- positioned where you want it.
-*/
-int ha_archive::get_row(gzFile file_to_read, byte *buf)
-{
- int read; // Bytes read, gzread() returns int
- uint *ptr, *end;
- char *last;
- size_t total_blob_length= 0;
- DBUG_ENTER("ha_archive::get_row");
-
- read= gzread(file_to_read, buf, table->s->reclength);
- DBUG_PRINT("ha_archive::get_row", ("Read %d bytes expected %lu", (int) read,
- table->s->reclength));
-
- if (read == Z_STREAM_ERROR)
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- /* If we read nothing we are at the end of the file */
- if (read == 0)
- DBUG_RETURN(HA_ERR_END_OF_FILE);
-
- /*
- If the record is the wrong size, the file is probably damaged, unless
- we are dealing with a delayed insert or a bulk insert.
- */
- if ((ulong) read != table->s->reclength)
- DBUG_RETURN(HA_ERR_END_OF_FILE);
-
- /* Calculate blob length, we use this for our buffer */
- for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ;
- ptr != end ;
- ptr++)
- total_blob_length += ((Field_blob*) table->field[*ptr])->get_length();
-
- /* Adjust our row buffer if we need be */
- buffer.alloc(total_blob_length);
- last= (char *)buffer.ptr();
-
- /* Loop through our blobs and read them */
- for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ;
- ptr != end ;
- ptr++)
- {
- size_t size= ((Field_blob*) table->field[*ptr])->get_length();
- if (size)
- {
- read= gzread(file_to_read, last, size);
- if ((size_t) read != size)
- DBUG_RETURN(HA_ERR_END_OF_FILE);
- ((Field_blob*) table->field[*ptr])->set_ptr(size, last);
- last += size;
- }
- }
- DBUG_RETURN(0);
-}
-
-
-/*
- Called during ORDER BY. Its position is either from being called sequentially
- or by having had ha_archive::rnd_pos() called before it is called.
-*/
-
-int ha_archive::rnd_next(byte *buf)
-{
- int rc;
- DBUG_ENTER("ha_archive::rnd_next");
-
- if (share->crashed)
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- if (!scan_rows)
- DBUG_RETURN(HA_ERR_END_OF_FILE);
- scan_rows--;
-
- statistic_increment(table->in_use->status_var.ha_read_rnd_next_count,
- &LOCK_status);
- current_position= gztell(archive);
- rc= get_row(archive, buf);
-
-
- if (rc != HA_ERR_END_OF_FILE)
- records++;
-
- DBUG_RETURN(rc);
-}
-
-
-/*
- Thanks to the table flag HA_REC_NOT_IN_SEQ this will be called after
- each call to ha_archive::rnd_next() if an ordering of the rows is
- needed.
-*/
-
-void ha_archive::position(const byte *record)
-{
- DBUG_ENTER("ha_archive::position");
- my_store_ptr(ref, ref_length, current_position);
- DBUG_VOID_RETURN;
-}
-
-
-/*
- This is called after a table scan for each row if the results of the
- scan need to be ordered. It will take *pos and use it to move the
- cursor in the file so that the next row that is called is the
- correctly ordered row.
-*/
-
-int ha_archive::rnd_pos(byte * buf, byte *pos)
-{
- DBUG_ENTER("ha_archive::rnd_pos");
- statistic_increment(table->in_use->status_var.ha_read_rnd_next_count,
- &LOCK_status);
- current_position= (z_off_t)my_get_ptr(pos, ref_length);
- (void)gzseek(archive, current_position, SEEK_SET);
-
- DBUG_RETURN(get_row(archive, buf));
-}
-
-/*
- This method repairs the meta file. It does this by walking the datafile and
- rewriting the meta file. Currently it does this by calling optimize with
- the extended flag.
-*/
-int ha_archive::repair(THD* thd, HA_CHECK_OPT* check_opt)
-{
- DBUG_ENTER("ha_archive::repair");
- check_opt->flags= T_EXTEND;
- int rc= optimize(thd, check_opt);
-
- if (rc)
- DBUG_RETURN(HA_ERR_CRASHED_ON_REPAIR);
-
- share->crashed= FALSE;
- DBUG_RETURN(0);
-}
-
-/*
- The table can become fragmented if data was inserted, read, and then
- inserted again. What we do is open up the file and recompress it completely.
-*/
-int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt)
-{
- DBUG_ENTER("ha_archive::optimize");
- int rc;
- gzFile writer;
- char writer_filename[FN_REFLEN];
-
- /* Open up the writer if we haven't yet */
- if (!share->archive_write_open)
- init_archive_writer();
-
- /* Flush any waiting data */
- gzflush(share->archive_write, Z_SYNC_FLUSH);
-
- /* Lets create a file to contain the new data */
- fn_format(writer_filename, share->table_name, "", ARN,
- MY_REPLACE_EXT|MY_UNPACK_FILENAME);
-
- if ((writer= gzopen(writer_filename, "wb")) == NULL)
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-
- /*
- An extended rebuild is a lot more effort. We open up each row and re-record it.
- Any dead rows are removed (aka rows that may have been partially recorded).
- */
-
- if (check_opt->flags == T_EXTEND)
- {
- byte *buf;
-
- /*
- First we create a buffer that we can use for reading rows, and can pass
- to get_row().
- */
- if (!(buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME))))
- {
- rc= HA_ERR_OUT_OF_MEM;
- goto error;
- }
-
- /*
- Now we will rewind the archive file so that we are positioned at the
- start of the file.
- */
- rc= read_data_header(archive);
-
- /*
- Assuming now error from rewinding the archive file, we now write out the
- new header for out data file.
- */
- if (!rc)
- rc= write_data_header(writer);
-
- /*
- On success of writing out the new header, we now fetch each row and
- insert it into the new archive file.
- */
- if (!rc)
- {
- share->rows_recorded= 0;
- while (!(rc= get_row(archive, buf)))
- {
- real_write_row(buf, writer);
- share->rows_recorded++;
- }
- }
- DBUG_PRINT("info", ("recovered %lu archive rows",
- (ulong) share->rows_recorded));
-
- my_free((char*)buf, MYF(0));
- if (rc && rc != HA_ERR_END_OF_FILE)
- goto error;
- }
- else
- {
- /*
- The quick method is to just read the data raw, and then compress it directly.
- */
- int read; // Bytes read, gzread() returns int
- char block[IO_SIZE];
- if (gzrewind(archive) == -1)
- {
- rc= HA_ERR_CRASHED_ON_USAGE;
- goto error;
- }
-
- while ((read= gzread(archive, block, IO_SIZE)))
- gzwrite(writer, block, read);
- }
-
- gzflush(writer, Z_SYNC_FLUSH);
- share->dirty= FALSE;
- gzclose(share->archive_write);
- share->archive_write= writer;
-
- my_rename(writer_filename,share->data_file_name,MYF(0));
-
- /*
- Now we need to reopen our read descriptor since it has changed.
- */
- gzclose(archive);
- if ((archive= gzopen(share->data_file_name, "rb")) == NULL)
- {
- rc= HA_ERR_CRASHED_ON_USAGE;
- goto error;
- }
-
-
- DBUG_RETURN(0);
-
-error:
- gzclose(writer);
-
- DBUG_RETURN(rc);
-}
-
-/*
- Below is an example of how to setup row level locking.
-*/
-THR_LOCK_DATA **ha_archive::store_lock(THD *thd,
- THR_LOCK_DATA **to,
- enum thr_lock_type lock_type)
-{
- if (lock_type == TL_WRITE_DELAYED)
- delayed_insert= TRUE;
- else
- delayed_insert= FALSE;
-
- if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK)
- {
- /*
- Here is where we get into the guts of a row level lock.
- If TL_UNLOCK is set
- If we are not doing a LOCK TABLE or DISCARD/IMPORT
- TABLESPACE, then allow multiple writers
- */
-
- if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
- lock_type <= TL_WRITE) && !thd->in_lock_tables
- && !thd->tablespace_op)
- lock_type = TL_WRITE_ALLOW_WRITE;
-
- /*
- In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
- MySQL would use the lock TL_READ_NO_INSERT on t2, and that
- would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
- to t2. Convert the lock to a normal read lock to allow
- concurrent inserts to t2.
- */
-
- if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
- lock_type = TL_READ;
-
- lock.type=lock_type;
- }
-
- *to++= &lock;
-
- return to;
-}
-
-
-/*
- Hints for optimizer, see ha_tina for more information
-*/
-int ha_archive::info(uint flag)
-{
- DBUG_ENTER("ha_archive::info");
- /*
- This should be an accurate number now, though bulk and delayed inserts can
- cause the number to be inaccurate.
- */
- records= share->rows_recorded;
- deleted= 0;
- /* Costs quite a bit more to get all information */
- if (flag & HA_STATUS_TIME)
- {
- MY_STAT file_stat; // Stat information for the data file
-
- VOID(my_stat(share->data_file_name, &file_stat, MYF(MY_WME)));
-
- mean_rec_length= table->s->reclength + buffer.alloced_length();
- data_file_length= file_stat.st_size;
- create_time= file_stat.st_ctime;
- update_time= file_stat.st_mtime;
- max_data_file_length= share->rows_recorded * mean_rec_length;
- }
- delete_length= 0;
- index_file_length=0;
-
- DBUG_RETURN(0);
-}
-
-
-/*
- This method tells us that a bulk insert operation is about to occur. We set
- a flag which will keep write_row from saying that its data is dirty. This in
- turn will keep selects from causing a sync to occur.
- Basically, yet another optimizations to keep compression working well.
-*/
-void ha_archive::start_bulk_insert(ha_rows rows)
-{
- DBUG_ENTER("ha_archive::start_bulk_insert");
- if (!rows || rows >= ARCHIVE_MIN_ROWS_TO_USE_BULK_INSERT)
- bulk_insert= TRUE;
- DBUG_VOID_RETURN;
-}
-
-
-/*
- Other side of start_bulk_insert, is end_bulk_insert. Here we turn off the bulk insert
- flag, and set the share dirty so that the next select will call sync for us.
-*/
-int ha_archive::end_bulk_insert()
-{
- DBUG_ENTER("ha_archive::end_bulk_insert");
- bulk_insert= FALSE;
- share->dirty= TRUE;
- DBUG_RETURN(0);
-}
-
-/*
- We cancel a truncate command. The only way to delete an archive table is to drop it.
- This is done for security reasons. In a later version we will enable this by
- allowing the user to select a different row format.
-*/
-int ha_archive::delete_all_rows()
-{
- DBUG_ENTER("ha_archive::delete_all_rows");
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
-}
-
-/*
- We just return state if asked.
-*/
-bool ha_archive::is_crashed() const
-{
- DBUG_ENTER("ha_archive::is_crashed");
- DBUG_RETURN(share->crashed);
-}
-
-/*
- Simple scan of the tables to make sure everything is ok.
-*/
-
-int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt)
-{
- int rc= 0;
- byte *buf;
- const char *old_proc_info=thd->proc_info;
- ha_rows count= share->rows_recorded;
- DBUG_ENTER("ha_archive::check");
-
- thd->proc_info= "Checking table";
- /* Flush any waiting data */
- gzflush(share->archive_write, Z_SYNC_FLUSH);
-
- /*
- First we create a buffer that we can use for reading rows, and can pass
- to get_row().
- */
- if (!(buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME))))
- rc= HA_ERR_OUT_OF_MEM;
-
- /*
- Now we will rewind the archive file so that we are positioned at the
- start of the file.
- */
- if (!rc)
- read_data_header(archive);
-
- if (!rc)
- while (!(rc= get_row(archive, buf)))
- count--;
-
- my_free((char*)buf, MYF(0));
-
- thd->proc_info= old_proc_info;
-
- if ((rc && rc != HA_ERR_END_OF_FILE) || count)
- {
- share->crashed= FALSE;
- DBUG_RETURN(HA_ADMIN_CORRUPT);
- }
- else
- {
- DBUG_RETURN(HA_ADMIN_OK);
- }
-}
-
-/*
- Check and repair the table if needed.
-*/
-bool ha_archive::check_and_repair(THD *thd)
-{
- HA_CHECK_OPT check_opt;
- DBUG_ENTER("ha_archive::check_and_repair");
-
- check_opt.init();
-
- DBUG_RETURN(repair(thd, &check_opt));
-}
-#endif /* HAVE_ARCHIVE_DB */