Merge sita.local:/Users/tsmith/m/bk/maint/51-target22

into sita.local:/Users/tsmith/m/bk/maint/51 sql/field.cc: Auto merged sql/log_event_old.cc: Auto merged sql/rpl_record.h: Auto merged sql/rpl_utility.cc: Auto merged sql/rpl_utility.h: Auto merged sql/slave.h: Auto merged storage/innobase/handler/ha_innodb.cc: Auto merged sql/log_event.cc: Manual merge sql/log_event.h: Manual merge sql/log_event_old.h: Manual merge sql/rpl_record.cc: Manual merge sql/slave.cc: Manual merge
author: unknown <tsmith@sita.local> 2007-08-29 15:28:38 -0600
committer: unknown <tsmith@sita.local> 2007-08-29 15:28:38 -0600
commit: 48193af48948acb84ddd3b488b6f8faa4a6d126b (patch)
tree: 81c00dc60322deeb0c094deef5d141509c980a60 /sql/log_event_old.cc
parent: 024bd2f612825cd36c93775728f56856e5f28267 (diff)
parent: 053c9d1c473df4e1457992ee6a39da3e0cb39bdd (diff)
download: mariadb-git-48193af48948acb84ddd3b488b6f8faa4a6d126b.tar.gz
1 files changed, 1115 insertions, 0 deletions
diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc
index 35e358001fd..420df67dc54 100644
--- a/sql/log_event_old.cc
+++ b/sql/log_event_old.cc
@@ -1,9 +1,967 @@
 
 #include "mysql_priv.h"
+#ifndef MYSQL_CLIENT
+#include "rpl_rli.h"
+#include "rpl_utility.h"
+#endif
 #include "log_event_old.h"
 #include "rpl_record_old.h"
 
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+
+// Old implementation of do_apply_event()
+int 
+Old_rows_log_event::do_apply_event(Rows_log_event *ev, const RELAY_LOG_INFO *rli)
+{
+  DBUG_ENTER("Rows_log_event::do_apply_event(st_relay_log_info*)");
+  int error= 0;
+  THD *thd= ev->thd;
+  uchar const *row_start= ev->m_rows_buf;
+
+  /*
+    If m_table_id == ~0UL, then we have a dummy event that does not
+    contain any data.  In that case, we just remove all tables in the
+    tables_to_lock list, close the thread tables, and return with
+    success.
+   */
+  if (ev->m_table_id == ~0UL)
+  {
+    /*
+       This one is supposed to be set: just an extra check so that
+       nothing strange has happened.
+     */
+    DBUG_ASSERT(ev->get_flags(Rows_log_event::STMT_END_F));
+
+    const_cast<RELAY_LOG_INFO*>(rli)->clear_tables_to_lock();
+    close_thread_tables(thd);
+    thd->clear_error();
+    DBUG_RETURN(0);
+  }
+
+  /*
+    'thd' has been set by exec_relay_log_event(), just before calling
+    do_apply_event(). We still check here to prevent future coding
+    errors.
+  */
+  DBUG_ASSERT(rli->sql_thd == thd);
+
+  /*
+    If there is no locks taken, this is the first binrow event seen
+    after the table map events.  We should then lock all the tables
+    used in the transaction and proceed with execution of the actual
+    event.
+  */
+  if (!thd->lock)
+  {
+    bool need_reopen= 1; /* To execute the first lap of the loop below */
+
+    /*
+      lock_tables() reads the contents of thd->lex, so they must be
+      initialized. Contrary to in
+      Table_map_log_event::do_apply_event() we don't call
+      mysql_init_query() as that may reset the binlog format.
+    */
+    lex_start(thd);
+
+    while ((error= lock_tables(thd, rli->tables_to_lock,
+                               rli->tables_to_lock_count, &need_reopen)))
+    {
+      if (!need_reopen)
+      {
+        if (thd->query_error || thd->is_fatal_error)
+        {
+          /*
+            Error reporting borrowed from Query_log_event with many excessive
+            simplifications (we don't honour --slave-skip-errors)
+          */
+          uint actual_error= thd->net.last_errno;
+          rli->report(ERROR_LEVEL, actual_error,
+                      "Error '%s' in %s event: when locking tables",
+                      (actual_error ? thd->net.last_error :
+                       "unexpected success or fatal error"),
+                      ev->get_type_str());
+          thd->is_fatal_error= 1;
+        }
+        else
+        {
+          rli->report(ERROR_LEVEL, error,
+                      "Error in %s event: when locking tables",
+                      ev->get_type_str());
+        }
+        const_cast<RELAY_LOG_INFO*>(rli)->clear_tables_to_lock();
+        DBUG_RETURN(error);
+      }
+
+      /*
+        So we need to reopen the tables.
+
+        We need to flush the pending RBR event, since it keeps a
+        pointer to an open table.
+
+        ALTERNATIVE SOLUTION (not implemented): Extract a pointer to
+        the pending RBR event and reset the table pointer after the
+        tables has been reopened.
+
+        NOTE: For this new scheme there should be no pending event:
+        need to add code to assert that is the case.
+       */
+      thd->binlog_flush_pending_rows_event(false);
+      TABLE_LIST *tables= rli->tables_to_lock;
+      close_tables_for_reopen(thd, &tables);
+
+      uint tables_count= rli->tables_to_lock_count;
+      if ((error= open_tables(thd, &tables, &tables_count, 0)))
+      {
+        if (thd->query_error || thd->is_fatal_error)
+        {
+          /*
+            Error reporting borrowed from Query_log_event with many excessive
+            simplifications (we don't honour --slave-skip-errors)
+          */
+          uint actual_error= thd->net.last_errno;
+          rli->report(ERROR_LEVEL, actual_error,
+                      "Error '%s' on reopening tables",
+                      (actual_error ? thd->net.last_error :
+                       "unexpected success or fatal error"));
+          thd->query_error= 1;
+        }
+        const_cast<RELAY_LOG_INFO*>(rli)->clear_tables_to_lock();
+        DBUG_RETURN(error);
+      }
+    }
+
+    /*
+      When the open and locking succeeded, we check all tables to
+      ensure that they still have the correct type.
+
+      We can use a down cast here since we know that every table added
+      to the tables_to_lock is a RPL_TABLE_LIST.
+    */
+
+    {
+      RPL_TABLE_LIST *ptr= rli->tables_to_lock;
+      for ( ; ptr ; ptr= static_cast<RPL_TABLE_LIST*>(ptr->next_global))
+      {
+        if (ptr->m_tabledef.compatible_with(rli, ptr->table))
+        {
+          mysql_unlock_tables(thd, thd->lock);
+          thd->lock= 0;
+          thd->query_error= 1;
+          const_cast<RELAY_LOG_INFO*>(rli)->clear_tables_to_lock();
+          DBUG_RETURN(Rows_log_event::ERR_BAD_TABLE_DEF);
+        }
+      }
+    }
+
+    /*
+      ... and then we add all the tables to the table map and remove
+      them from tables to lock.
+
+      We also invalidate the query cache for all the tables, since
+      they will now be changed.
+
+      TODO [/Matz]: Maybe the query cache should not be invalidated
+      here? It might be that a table is not changed, even though it
+      was locked for the statement.  We do know that each
+      Rows_log_event contain at least one row, so after processing one
+      Rows_log_event, we can invalidate the query cache for the
+      associated table.
+     */
+    for (TABLE_LIST *ptr= rli->tables_to_lock ; ptr ; ptr= ptr->next_global)
+    {
+      const_cast<RELAY_LOG_INFO*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
+    }
+#ifdef HAVE_QUERY_CACHE
+    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+#endif
+  }
+
+  TABLE* table= const_cast<RELAY_LOG_INFO*>(rli)->m_table_map.get_table(ev->m_table_id);
+
+  if (table)
+  {
+    /*
+      table == NULL means that this table should not be replicated
+      (this was set up by Table_map_log_event::do_apply_event()
+      which tested replicate-* rules).
+    */
+
+    /*
+      It's not needed to set_time() but
+      1) it continues the property that "Time" in SHOW PROCESSLIST shows how
+      much slave is behind
+      2) it will be needed when we allow replication from a table with no
+      TIMESTAMP column to a table with one.
+      So we call set_time(), like in SBR. Presently it changes nothing.
+    */
+    thd->set_time((time_t)ev->when);
+    /*
+      There are a few flags that are replicated with each row event.
+      Make sure to set/clear them before executing the main body of
+      the event.
+    */
+    if (ev->get_flags(Rows_log_event::NO_FOREIGN_KEY_CHECKS_F))
+        thd->options|= OPTION_NO_FOREIGN_KEY_CHECKS;
+    else
+        thd->options&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
+
+    if (ev->get_flags(Rows_log_event::RELAXED_UNIQUE_CHECKS_F))
+        thd->options|= OPTION_RELAXED_UNIQUE_CHECKS;
+    else
+        thd->options&= ~OPTION_RELAXED_UNIQUE_CHECKS;
+    /* A small test to verify that objects have consistent types */
+    DBUG_ASSERT(sizeof(thd->options) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS));
+
+    /*
+      Now we are in a statement and will stay in a statement until we
+      see a STMT_END_F.
+
+      We set this flag here, before actually applying any rows, in
+      case the SQL thread is stopped and we need to detect that we're
+      inside a statement and halting abruptly might cause problems
+      when restarting.
+     */
+    const_cast<RELAY_LOG_INFO*>(rli)->set_flag(RELAY_LOG_INFO::IN_STMT);
+
+    error= do_before_row_operations(table);
+    while (error == 0 && row_start < ev->m_rows_end)
+    {
+      uchar const *row_end= NULL;
+      if ((error= do_prepare_row(thd, rli, table, row_start, &row_end)))
+        break; // We should perform the after-row operation even in
+               // the case of error
+
+      DBUG_ASSERT(row_end != NULL); // cannot happen
+      DBUG_ASSERT(row_end <= ev->m_rows_end);
+
+      /* in_use can have been set to NULL in close_tables_for_reopen */
+      THD* old_thd= table->in_use;
+      if (!table->in_use)
+        table->in_use= thd;
+      error= do_exec_row(table);
+      table->in_use = old_thd;
+      switch (error)
+      {
+        /* Some recoverable errors */
+      case HA_ERR_RECORD_CHANGED:
+      case HA_ERR_KEY_NOT_FOUND:  /* Idempotency support: OK if
+                                           tuple does not exist */
+  error= 0;
+      case 0:
+  break;
+
+      default:
+  rli->report(ERROR_LEVEL, thd->net.last_errno,
+                    "Error in %s event: row application failed. %s",
+                    ev->get_type_str(),
+                    thd->net.last_error ? thd->net.last_error : "");
+  thd->query_error= 1;
+  break;
+      }
+
+      row_start= row_end;
+    }
+    DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event",
+                    const_cast<RELAY_LOG_INFO*>(rli)->abort_slave= 1;);
+    error= do_after_row_operations(table, error);
+    if (!ev->cache_stmt)
+    {
+      DBUG_PRINT("info", ("Marked that we need to keep log"));
+      thd->options|= OPTION_KEEP_LOG;
+    }
+  }
+
+  /*
+    We need to delay this clear until the table def is no longer needed.
+    The table def is needed in unpack_row().
+  */
+  if (rli->tables_to_lock && ev->get_flags(Rows_log_event::STMT_END_F))
+    const_cast<RELAY_LOG_INFO*>(rli)->clear_tables_to_lock();
+
+  if (error)
+  {                     /* error has occured during the transaction */
+    rli->report(ERROR_LEVEL, thd->net.last_errno,
+                "Error in %s event: error during transaction execution "
+                "on table %s.%s. %s",
+                ev->get_type_str(), table->s->db.str,
+                table->s->table_name.str,
+                thd->net.last_error ? thd->net.last_error : "");
+
+    /*
+      If one day we honour --skip-slave-errors in row-based replication, and
+      the error should be skipped, then we would clear mappings, rollback,
+      close tables, but the slave SQL thread would not stop and then may
+      assume the mapping is still available, the tables are still open...
+      So then we should clear mappings/rollback/close here only if this is a
+      STMT_END_F.
+      For now we code, knowing that error is not skippable and so slave SQL
+      thread is certainly going to stop.
+      rollback at the caller along with sbr.
+    */
+    thd->reset_current_stmt_binlog_row_based();
+    const_cast<RELAY_LOG_INFO*>(rli)->cleanup_context(thd, error);
+    thd->query_error= 1;
+    DBUG_RETURN(error);
+  }
+
+  /*
+    This code would ideally be placed in do_update_pos() instead, but
+    since we have no access to table there, we do the setting of
+    last_event_start_time here instead.
+  */
+  if (table && (table->s->primary_key == MAX_KEY) &&
+      !ev->cache_stmt && 
+      ev->get_flags(Rows_log_event::STMT_END_F) == Rows_log_event::RLE_NO_FLAGS)
+  {
+    /*
+      ------------ Temporary fix until WL#2975 is implemented ---------
+
+      This event is not the last one (no STMT_END_F). If we stop now
+      (in case of terminate_slave_thread()), how will we restart? We
+      have to restart from Table_map_log_event, but as this table is
+      not transactional, the rows already inserted will still be
+      present, and idempotency is not guaranteed (no PK) so we risk
+      that repeating leads to double insert. So we desperately try to
+      continue, hope we'll eventually leave this buggy situation (by
+      executing the final Rows_log_event). If we are in a hopeless
+      wait (reached end of last relay log and nothing gets appended
+      there), we timeout after one minute, and notify DBA about the
+      problem.  When WL#2975 is implemented, just remove the member
+      st_relay_log_info::last_event_start_time and all its occurences.
+    */
+    const_cast<RELAY_LOG_INFO*>(rli)->last_event_start_time= my_time(0);
+  }
+
+  DBUG_RETURN(0);
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+
+/*
+  Check if there are more UNIQUE keys after the given key.
+*/
+static int
+last_uniq_key(TABLE *table, uint keyno)
+{
+  while (++keyno < table->s->keys)
+    if (table->key_info[keyno].flags & HA_NOSAME)
+      return 0;
+  return 1;
+}
+
+/*
+  Compares table->record[0] and table->record[1]
+
+  Returns TRUE if different.
+*/
+static bool record_compare(TABLE *table)
+{
+  /*
+    Need to set the X bit and the filler bits in both records since
+    there are engines that do not set it correctly.
+
+    In addition, since MyISAM checks that one hasn't tampered with the
+    record, it is necessary to restore the old bytes into the record
+    after doing the comparison.
+
+    TODO[record format ndb]: Remove it once NDB returns correct
+    records. Check that the other engines also return correct records.
+   */
+
+  bool result= FALSE;
+  uchar saved_x[2], saved_filler[2];
+
+  if (table->s->null_bytes > 0)
+  {
+    for (int i = 0 ; i < 2 ; ++i)
+    {
+      saved_x[i]= table->record[i][0];
+      saved_filler[i]= table->record[i][table->s->null_bytes - 1];
+      table->record[i][0]|= 1U;
+      table->record[i][table->s->null_bytes - 1]|=
+        256U - (1U << table->s->last_null_bit_pos);
+    }
+  }
+
+  if (table->s->blob_fields + table->s->varchar_fields == 0)
+  {
+    result= cmp_record(table,record[1]);
+    goto record_compare_exit;
+  }
+
+  /* Compare null bits */
+  if (memcmp(table->null_flags,
+       table->null_flags+table->s->rec_buff_length,
+       table->s->null_bytes))
+  {
+    result= TRUE;       // Diff in NULL value
+    goto record_compare_exit;
+  }
+
+  /* Compare updated fields */
+  for (Field **ptr=table->field ; *ptr ; ptr++)
+  {
+    if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length))
+    {
+      result= TRUE;
+      goto record_compare_exit;
+    }
+  }
+
+record_compare_exit:
+  /*
+    Restore the saved bytes.
+
+    TODO[record format ndb]: Remove this code once NDB returns the
+    correct record format.
+  */
+  if (table->s->null_bytes > 0)
+  {
+    for (int i = 0 ; i < 2 ; ++i)
+    {
+      table->record[i][0]= saved_x[i];
+      table->record[i][table->s->null_bytes - 1]= saved_filler[i];
+    }
+  }
+
+  return result;
+}
+
+/*
+  Copy "extra" columns from record[1] to record[0].
+
+  Copy the extra fields that are not present on the master but are
+  present on the slave from record[1] to record[0].  This is used
+  after fetching a record that are to be updated, either inside
+  replace_record() or as part of executing an update_row().
+ */
+static int
+copy_extra_record_fields(TABLE *table,
+                         size_t master_reclength,
+                         my_ptrdiff_t master_fields)
+{
+  DBUG_ENTER("copy_extra_record_fields(table, master_reclen, master_fields)");
+  DBUG_PRINT("info", ("Copying to 0x%lx "
+                      "from field %lu at offset %lu "
+                      "to field %d at offset %lu",
+                      (long) table->record[0],
+                      (ulong) master_fields, (ulong) master_reclength,
+                      table->s->fields, table->s->reclength));
+  /*
+    Copying the extra fields of the slave that does not exist on
+    master into record[0] (which are basically the default values).
+  */
+
+  if (table->s->fields < (uint) master_fields)
+    DBUG_RETURN(0);
+
+ DBUG_ASSERT(master_reclength <= table->s->reclength);
+  if (master_reclength < table->s->reclength)
+    bmove_align(table->record[0] + master_reclength,
+                table->record[1] + master_reclength,
+                table->s->reclength - master_reclength);
+    
+  /*
+    Bit columns are special.  We iterate over all the remaining
+    columns and copy the "extra" bits to the new record.  This is
+    not a very good solution: it should be refactored on
+    opportunity.
+
+    REFACTORING SUGGESTION (Matz).  Introduce a member function
+    similar to move_field_offset() called copy_field_offset() to
+    copy field values and implement it for all Field subclasses. Use
+    this function to copy data from the found record to the record
+    that are going to be inserted.
+
+    The copy_field_offset() function need to be a virtual function,
+    which in this case will prevent copying an entire range of
+    fields efficiently.
+  */
+  {
+    Field **field_ptr= table->field + master_fields;
+    for ( ; *field_ptr ; ++field_ptr)
+    {
+      /*
+        Set the null bit according to the values in record[1]
+       */
+      if ((*field_ptr)->maybe_null() &&
+          (*field_ptr)->is_null_in_record(reinterpret_cast<uchar*>(table->record[1])))
+        (*field_ptr)->set_null();
+      else
+        (*field_ptr)->set_notnull();
+
+      /*
+        Do the extra work for special columns.
+       */
+      switch ((*field_ptr)->real_type())
+      {
+      default:
+        /* Nothing to do */
+        break;
+
+      case MYSQL_TYPE_BIT:
+        Field_bit *f= static_cast<Field_bit*>(*field_ptr);
+        if (f->bit_len > 0)
+        {
+          my_ptrdiff_t const offset= table->record[1] - table->record[0];
+          uchar const bits=
+            get_rec_bits(f->bit_ptr + offset, f->bit_ofs, f->bit_len);
+          set_rec_bits(bits, f->bit_ptr, f->bit_ofs, f->bit_len);
+        }
+        break;
+      }
+    }
+  }
+  DBUG_RETURN(0);                                     // All OK
+}
+
+/*
+  Replace the provided record in the database.
+
+  SYNOPSIS
+      replace_record()
+      thd    Thread context for writing the record.
+      table  Table to which record should be written.
+      master_reclength
+             Offset to first column that is not present on the master,
+             alternatively the length of the record on the master
+             side.
+
+  RETURN VALUE
+      Error code on failure, 0 on success.
+
+  DESCRIPTION
+      Similar to how it is done in mysql_insert(), we first try to do
+      a ha_write_row() and of that fails due to duplicated keys (or
+      indices), we do an ha_update_row() or a ha_delete_row() instead.
+ */
+static int
+replace_record(THD *thd, TABLE *table,
+               ulong const master_reclength,
+               uint const master_fields)
+{
+  DBUG_ENTER("replace_record");
+  DBUG_ASSERT(table != NULL && thd != NULL);
+
+  int error;
+  int keynum;
+  auto_afree_ptr<char> key(NULL);
+
+#ifndef DBUG_OFF
+  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
+  DBUG_PRINT_BITSET("debug", "write_set = %s", table->write_set);
+  DBUG_PRINT_BITSET("debug", "read_set = %s", table->read_set);
+#endif
+
+  while ((error= table->file->ha_write_row(table->record[0])))
+  {
+    if (error == HA_ERR_LOCK_DEADLOCK || error == HA_ERR_LOCK_WAIT_TIMEOUT)
+    {
+      table->file->print_error(error, MYF(0)); /* to check at exec_relay_log_event */
+      DBUG_RETURN(error);
+    }
+    if ((keynum= table->file->get_dup_key(error)) < 0)
+    {
+      table->file->print_error(error, MYF(0));
+      /*
+        We failed to retrieve the duplicate key
+        - either because the error was not "duplicate key" error
+        - or because the information which key is not available
+      */
+      DBUG_RETURN(error);
+    }
+
+    /*
+       We need to retrieve the old row into record[1] to be able to
+       either update or delete the offending record.  We either:
+
+       - use rnd_pos() with a row-id (available as dupp_row) to the
+         offending row, if that is possible (MyISAM and Blackhole), or else
+
+       - use index_read_idx() with the key that is duplicated, to
+         retrieve the offending row.
+     */
+    if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
+    {
+      error= table->file->rnd_pos(table->record[1], table->file->dup_ref);
+      if (error)
+      {
+        table->file->print_error(error, MYF(0));
+        DBUG_RETURN(error);
+      }
+    }
+    else
+    {
+      if (table->file->extra(HA_EXTRA_FLUSH_CACHE))
+      {
+        DBUG_RETURN(my_errno);
+      }
+
+      if (key.get() == NULL)
+      {
+        key.assign(static_cast<char*>(my_alloca(table->s->max_unique_length)));
+        if (key.get() == NULL)
+          DBUG_RETURN(ENOMEM);
+      }
+
+      key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
+               0);
+      error= table->file->index_read_idx_map(table->record[1], keynum,
+                                             (const uchar*)key.get(),
+                                             HA_WHOLE_KEY,
+                                             HA_READ_KEY_EXACT);
+      if (error)
+      {
+        table->file->print_error(error, MYF(0));
+        DBUG_RETURN(error);
+      }
+    }
+
+    /*
+       Now, table->record[1] should contain the offending row.  That
+       will enable us to update it or, alternatively, delete it (so
+       that we can insert the new row afterwards).
+
+       First we copy the columns into table->record[0] that are not
+       present on the master from table->record[1], if there are any.
+    */
+    copy_extra_record_fields(table, master_reclength, master_fields);
+
+    /*
+       REPLACE is defined as either INSERT or DELETE + INSERT.  If
+       possible, we can replace it with an UPDATE, but that will not
+       work on InnoDB if FOREIGN KEY checks are necessary.
+
+       I (Matz) am not sure of the reason for the last_uniq_key()
+       check as, but I'm guessing that it's something along the
+       following lines.
+
+       Suppose that we got the duplicate key to be a key that is not
+       the last unique key for the table and we perform an update:
+       then there might be another key for which the unique check will
+       fail, so we're better off just deleting the row and inserting
+       the correct row.
+     */
+    if (last_uniq_key(table, keynum) &&
+        !table->file->referenced_by_foreign_key())
+    {
+      error=table->file->ha_update_row(table->record[1],
+                                       table->record[0]);
+      if (error && error != HA_ERR_RECORD_IS_THE_SAME)
+        table->file->print_error(error, MYF(0));
+      else
+        error= 0;
+      DBUG_RETURN(error);
+    }
+    else
+    {
+      if ((error= table->file->ha_delete_row(table->record[1])))
+      {
+        table->file->print_error(error, MYF(0));
+        DBUG_RETURN(error);
+      }
+      /* Will retry ha_write_row() with the offending row removed. */
+    }
+  }
+
+  DBUG_RETURN(error);
+}
+
+/**
+  Find the row given by 'key', if the table has keys, or else use a table scan
+  to find (and fetch) the row.
+
+  If the engine allows random access of the records, a combination of
+  position() and rnd_pos() will be used.
+
+  @param table Pointer to table to search
+  @param key   Pointer to key to use for search, if table has key
+
+  @pre <code>table->record[0]</code> shall contain the row to locate
+  and <code>key</code> shall contain a key to use for searching, if
+  the engine has a key.
+
+  @post If the return value is zero, <code>table->record[1]</code>
+  will contain the fetched row and the internal "cursor" will refer to
+  the row. If the return value is non-zero,
+  <code>table->record[1]</code> is undefined.  In either case,
+  <code>table->record[0]</code> is undefined.
+
+  @return Zero if the row was successfully fetched into
+  <code>table->record[1]</code>, error code otherwise.
+ */
+
+static int find_and_fetch_row(TABLE *table, uchar *key)
+{
+  DBUG_ENTER("find_and_fetch_row(TABLE *table, uchar *key, uchar *record)");
+  DBUG_PRINT("enter", ("table: 0x%lx, key: 0x%lx  record: 0x%lx",
+           (long) table, (long) key, (long) table->record[1]));
+
+  DBUG_ASSERT(table->in_use != NULL);
+
+  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
+
+  if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) &&
+      table->s->primary_key < MAX_KEY)
+  {
+    /*
+      Use a more efficient method to fetch the record given by
+      table->record[0] if the engine allows it.  We first compute a
+      row reference using the position() member function (it will be
+      stored in table->file->ref) and the use rnd_pos() to position
+      the "cursor" (i.e., record[0] in this case) at the correct row.
+
+      TODO: Add a check that the correct record has been fetched by
+      comparing with the original record. Take into account that the
+      record on the master and slave can be of different
+      length. Something along these lines should work:
+
+      ADD>>>  store_record(table,record[1]);
+              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+      ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
+                                 table->s->reclength) == 0);
+
+    */
+    table->file->position(table->record[0]);
+    int error= table->file->rnd_pos(table->record[0], table->file->ref);
+    /*
+      rnd_pos() returns the record in table->record[0], so we have to
+      move it to table->record[1].
+     */
+    bmove_align(table->record[1], table->record[0], table->s->reclength);
+    DBUG_RETURN(error);
+  }
+
+  /* We need to retrieve all fields */
+  /* TODO: Move this out from this function to main loop */
+  table->use_all_columns();
+
+  if (table->s->keys > 0)
+  {
+    int error;
+    /* We have a key: search the table using the index */
+    if (!table->file->inited && (error= table->file->ha_index_init(0, FALSE)))
+      DBUG_RETURN(error);
+
+  /*
+    Don't print debug messages when running valgrind since they can
+    trigger false warnings.
+   */
+#ifndef HAVE_purify
+    DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength);
+    DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength);
+#endif
+
+    /*
+      We need to set the null bytes to ensure that the filler bit are
+      all set when returning.  There are storage engines that just set
+      the necessary bits on the bytes and don't set the filler bits
+      correctly.
+    */
+    my_ptrdiff_t const pos=
+      table->s->null_bytes > 0 ? table->s->null_bytes - 1 : 0;
+    table->record[1][pos]= 0xFF;
+    if ((error= table->file->index_read_map(table->record[1], key, HA_WHOLE_KEY,
+                                            HA_READ_KEY_EXACT)))
+    {
+      table->file->print_error(error, MYF(0));
+      table->file->ha_index_end();
+      DBUG_RETURN(error);
+    }
+
+  /*
+    Don't print debug messages when running valgrind since they can
+    trigger false warnings.
+   */
+#ifndef HAVE_purify
+    DBUG_DUMP("table->record[0]", table->record[0], table->s->reclength);
+    DBUG_DUMP("table->record[1]", table->record[1], table->s->reclength);
+#endif
+    /*
+      Below is a minor "optimization".  If the key (i.e., key number
+      0) has the HA_NOSAME flag set, we know that we have found the
+      correct record (since there can be no duplicates); otherwise, we
+      have to compare the record with the one found to see if it is
+      the correct one.
+
+      CAVEAT! This behaviour is essential for the replication of,
+      e.g., the mysql.proc table since the correct record *shall* be
+      found using the primary key *only*.  There shall be no
+      comparison of non-PK columns to decide if the correct record is
+      found.  I can see no scenario where it would be incorrect to
+      chose the row to change only using a PK or an UNNI.
+    */
+    if (table->key_info->flags & HA_NOSAME)
+    {
+      table->file->ha_index_end();
+      DBUG_RETURN(0);
+    }
+
+    while (record_compare(table))
+    {
+      int error;
+
+      /*
+        We need to set the null bytes to ensure that the filler bit
+        are all set when returning.  There are storage engines that
+        just set the necessary bits on the bytes and don't set the
+        filler bits correctly.
+
+        TODO[record format ndb]: Remove this code once NDB returns the
+        correct record format.
+      */
+      if (table->s->null_bytes > 0)
+      {
+        table->record[1][table->s->null_bytes - 1]|=
+          256U - (1U << table->s->last_null_bit_pos);
+      }
+
+      if ((error= table->file->index_next(table->record[1])))
+      {
+  table->file->print_error(error, MYF(0));
+        table->file->ha_index_end();
+  DBUG_RETURN(error);
+      }
+    }
+
+    /*
+      Have to restart the scan to be able to fetch the next row.
+    */
+    table->file->ha_index_end();
+  }
+  else
+  {
+    int restart_count= 0; // Number of times scanning has restarted from top
+    int error;
+
+    /* We don't have a key: search the table using rnd_next() */
+    if ((error= table->file->ha_rnd_init(1)))
+      return error;
+
+    /* Continue until we find the right record or have made a full loop */
+    do
+    {
+      error= table->file->rnd_next(table->record[1]);
+
+      DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
+      DBUG_DUMP("record[1]", table->record[1], table->s->reclength);
+
+      switch (error) {
+      case 0:
+      case HA_ERR_RECORD_DELETED:
+  break;
+
+      case HA_ERR_END_OF_FILE:
+  if (++restart_count < 2)
+    table->file->ha_rnd_init(1);
+  break;
+
+      default:
+  table->file->print_error(error, MYF(0));
+        DBUG_PRINT("info", ("Record not found"));
+        table->file->ha_rnd_end();
+  DBUG_RETURN(error);
+      }
+    }
+    while (restart_count < 2 && record_compare(table));
+
+    /*
+      Have to restart the scan to be able to fetch the next row.
+    */
+    DBUG_PRINT("info", ("Record %sfound", restart_count == 2 ? "not " : ""));
+    table->file->ha_rnd_end();
+
+    DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0);
+    DBUG_RETURN(error);
+  }
+
+  DBUG_RETURN(0);
+}
+
+/**********************************************************
+  Row handling primitives for Write_rows_log_event_old
+ **********************************************************/
+
+int Write_rows_log_event_old::do_before_row_operations(TABLE *table)
+{
+  int error= 0;
+
+  /*
+    We are using REPLACE semantics and not INSERT IGNORE semantics
+    when writing rows, that is: new rows replace old rows.  We need to
+    inform the storage engine that it should use this behaviour.
+  */
+
+  /* Tell the storage engine that we are using REPLACE semantics. */
+  thd->lex->duplicates= DUP_REPLACE;
+
+  /*
+    Pretend we're executing a REPLACE command: this is needed for
+    InnoDB and NDB Cluster since they are not (properly) checking the
+    lex->duplicates flag.
+  */
+  thd->lex->sql_command= SQLCOM_REPLACE;
+  /* 
+     Do not raise the error flag in case of hitting to an unique attribute
+  */
+  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+  /* 
+     NDB specific: update from ndb master wrapped as Write_rows
+  */
+  /*
+    so that the event should be applied to replace slave's row
+  */
+  table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
+  /* 
+     NDB specific: if update from ndb master wrapped as Write_rows
+     does not find the row it's assumed idempotent binlog applying
+     is taking place; don't raise the error.
+  */
+  table->file->extra(HA_EXTRA_IGNORE_NO_KEY);
+  /*
+    TODO: the cluster team (Tomas?) says that it's better if the engine knows
+    how many rows are going to be inserted, then it can allocate needed memory
+    from the start.
+  */
+  table->file->ha_start_bulk_insert(0);
+  /*
+    We need TIMESTAMP_NO_AUTO_SET otherwise ha_write_row() will not use fill
+    any TIMESTAMP column with data from the row but instead will use
+    the event's current time.
+    As we replicate from TIMESTAMP to TIMESTAMP and slave has no extra
+    columns, we know that all TIMESTAMP columns on slave will receive explicit
+    data from the row, so TIMESTAMP_NO_AUTO_SET is ok.
+    When we allow a table without TIMESTAMP to be replicated to a table having
+    more columns including a TIMESTAMP column, or when we allow a TIMESTAMP
+    column to be replicated into a BIGINT column and the slave's table has a
+    TIMESTAMP column, then the slave's TIMESTAMP column will take its value
+    from set_time() which we called earlier (consistent with SBR). And then in
+    some cases we won't want TIMESTAMP_NO_AUTO_SET (will require some code to
+    analyze if explicit data is provided for slave's TIMESTAMP columns).
+  */
+  table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
+  return error;
+}
+
+int Write_rows_log_event_old::do_after_row_operations(TABLE *table, int error)
+{
+  int local_error= 0;
+  table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
+  table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
+  /*
+    reseting the extra with 
+    table->file->extra(HA_EXTRA_NO_IGNORE_NO_KEY); 
+    fires bug#27077
+    todo: explain or fix
+  */
+  if ((local_error= table->file->ha_end_bulk_insert()))
+  {
+    table->file->print_error(local_error, MYF(0));
+  }
+  return error? error : local_error;
+}
+
 int
 Write_rows_log_event_old::do_prepare_row(THD *thd_arg,
                                          Relay_log_info const *rli,
@@ -23,6 +981,65 @@ Write_rows_log_event_old::do_prepare_row(THD *thd_arg,
   return error;
 }
 
+int Write_rows_log_event_old::do_exec_row(TABLE *table)
+{
+  DBUG_ASSERT(table != NULL);
+  int error= replace_record(thd, table, m_master_reclength, m_width);
+  return error;
+}
+
+/**********************************************************
+  Row handling primitives for Delete_rows_log_event_old
+ **********************************************************/
+
+int Delete_rows_log_event_old::do_before_row_operations(TABLE *table)
+{
+  DBUG_ASSERT(m_memory == NULL);
+
+  if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) &&
+      table->s->primary_key < MAX_KEY)
+  {
+    /*
+      We don't need to allocate any memory for m_after_image and
+      m_key since they are not used.
+    */
+    return 0;
+  }
+
+  int error= 0;
+
+  if (table->s->keys > 0)
+  {
+    m_memory= (uchar*) my_multi_malloc(MYF(MY_WME),
+                                       &m_after_image,
+                                       (uint) table->s->reclength,
+                                       &m_key,
+                                       (uint) table->key_info->key_length,
+                                       NullS);
+  }
+  else
+  {
+    m_after_image= (uchar*) my_malloc(table->s->reclength, MYF(MY_WME));
+    m_memory= (uchar*)m_after_image;
+    m_key= NULL;
+  }
+  if (!m_memory)
+    return HA_ERR_OUT_OF_MEM;
+
+  return error;
+}
+
+int Delete_rows_log_event_old::do_after_row_operations(TABLE *table, int error)
+{
+  /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/
+  table->file->ha_index_or_rnd_end();
+  my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); // Free for multi_malloc
+  m_memory= NULL;
+  m_after_image= NULL;
+  m_key= NULL;
+
+  return error;
+}
 
 int
 Delete_rows_log_event_old::do_prepare_row(THD *thd_arg,
@@ -57,6 +1074,67 @@ Delete_rows_log_event_old::do_prepare_row(THD *thd_arg,
   return error;
 }
 
+int Delete_rows_log_event_old::do_exec_row(TABLE *table)
+{
+  int error;
+  DBUG_ASSERT(table != NULL);
+
+  if (!(error= ::find_and_fetch_row(table, m_key)))
+  { 
+    /*
+      Now we should have the right row to delete.  We are using
+      record[0] since it is guaranteed to point to a record with the
+      correct value.
+    */
+    error= table->file->ha_delete_row(table->record[0]);
+  }
+  return error;
+}
+
+/**********************************************************
+  Row handling primitives for Update_rows_log_event_old
+ **********************************************************/
+
+int Update_rows_log_event_old::do_before_row_operations(TABLE *table)
+{
+  DBUG_ASSERT(m_memory == NULL);
+
+  int error= 0;
+
+  if (table->s->keys > 0)
+  {
+    m_memory= (uchar*) my_multi_malloc(MYF(MY_WME),
+                                       &m_after_image,
+                                       (uint) table->s->reclength,
+                                       &m_key,
+                                       (uint) table->key_info->key_length,
+                                       NullS);
+  }
+  else
+  {
+    m_after_image= (uchar*) my_malloc(table->s->reclength, MYF(MY_WME));
+    m_memory= m_after_image;
+    m_key= NULL;
+  }
+  if (!m_memory)
+    return HA_ERR_OUT_OF_MEM;
+
+  table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
+
+  return error;
+}
+
+int Update_rows_log_event_old::do_after_row_operations(TABLE *table, int error)
+{
+  /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/
+  table->file->ha_index_or_rnd_end();
+  my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR));
+  m_memory= NULL;
+  m_after_image= NULL;
+  m_key= NULL;
+
+  return error;
+}
 
 int Update_rows_log_event_old::do_prepare_row(THD *thd_arg,
                                               Relay_log_info const *rli,
@@ -101,4 +1179,41 @@ int Update_rows_log_event_old::do_prepare_row(THD *thd_arg,
   return error;
 }
 
+int Update_rows_log_event_old::do_exec_row(TABLE *table)
+{
+  DBUG_ASSERT(table != NULL);
+
+  int error= ::find_and_fetch_row(table, m_key);
+  if (error)
+    return error;
+
+  /*
+    We have to ensure that the new record (i.e., the after image) is
+    in record[0] and the old record (i.e., the before image) is in
+    record[1].  This since some storage engines require this (for
+    example, the partition engine).
+
+    Since find_and_fetch_row() puts the fetched record (i.e., the old
+    record) in record[1], we can keep it there. We put the new record
+    (i.e., the after image) into record[0], and copy the fields that
+    are on the slave (i.e., in record[1]) into record[0], effectively
+    overwriting the default values that where put there by the
+    unpack_row() function.
+  */
+  bmove_align(table->record[0], m_after_image, table->s->reclength);
+  copy_extra_record_fields(table, m_master_reclength, m_width);
+
+  /*
+    Now we have the right row to update.  The old row (the one we're
+    looking for) is in record[1] and the new row has is in record[0].
+    We also have copied the original values already in the slave's
+    database into the after image delivered from the master.
+  */
+  error= table->file->ha_update_row(table->record[1], table->record[0]);
+  if (error == HA_ERR_RECORD_IS_THE_SAME)
+    error= 0;
+
+  return error;
+}
+
 #endif
author	unknown <tsmith@sita.local>	2007-08-29 15:28:38 -0600
committer	unknown <tsmith@sita.local>	2007-08-29 15:28:38 -0600
commit	48193af48948acb84ddd3b488b6f8faa4a6d126b (patch)
tree	81c00dc60322deeb0c094deef5d141509c980a60 /sql/log_event_old.cc
parent	024bd2f612825cd36c93775728f56856e5f28267 (diff)
parent	053c9d1c473df4e1457992ee6a39da3e0cb39bdd (diff)
download	mariadb-git-48193af48948acb84ddd3b488b6f8faa4a6d126b.tar.gz