diff options
Diffstat (limited to 'sql')
74 files changed, 4700 insertions, 1439 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am index f3e2484f9ab..0081417d492 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -161,6 +161,7 @@ link_sources: @LN_CP_F@ $(top_srcdir)/sql-common/my_time.c my_time.c rm -f my_user.c @LN_CP_F@ $(top_srcdir)/sql-common/my_user.c my_user.c + echo timestamp > link_sources # This generates lex_hash.h # NOTE Built sources should depend on their sources not the tool diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc index adac2b596c1..f4b64ab3012 100644 --- a/sql/event_data_objects.cc +++ b/sql/event_data_objects.cc @@ -1649,8 +1649,6 @@ err: void Event_queue_element::mark_last_executed(THD *thd) { - thd->set_current_time(); - last_executed= (my_time_t) thd->query_start(); last_executed_changed= TRUE; diff --git a/sql/events.cc b/sql/events.cc index 4579fb4d086..87385082a82 100644 --- a/sql/events.cc +++ b/sql/events.cc @@ -146,7 +146,7 @@ bool Events::set_opt_event_scheduler(char *argument) { if (argument == NULL) - opt_event_scheduler= Events::EVENTS_DISABLED; + opt_event_scheduler= Events::EVENTS_ON; else { int type; diff --git a/sql/field.cc b/sql/field.cc index 0ba1e77a4af..52168e713aa 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1394,20 +1394,85 @@ int Field::store(const char *to, uint length, CHARSET_INFO *cs, /** + Pack the field into a format suitable for storage and transfer. + + To implement packing functionality, only the virtual function + should be overridden. The other functions are just convenience + functions and hence should not be overridden. + + The value of <code>low_byte_first</code> is dependent on how the + packed data is going to be used: for local use, e.g., temporary + store on disk or in memory, use the native format since that is + faster. For data that is going to be transfered to other machines + (e.g., when writing data to the binary log), data should always be + stored in little-endian format. + + @note The default method for packing fields just copy the raw bytes + of the record into the destination, but never more than + <code>max_length</code> characters. + + @param to + Pointer to memory area where representation of field should be put. + + @param from + Pointer to memory area where record representation of field is + stored. + + @param max_length + Maximum length of the field, as given in the column definition. For + example, for <code>CHAR(1000)</code>, the <code>max_length</code> + is 1000. This information is sometimes needed to decide how to pack + the data. + + @param low_byte_first + @c TRUE if integers should be stored little-endian, @c FALSE if + native format should be used. Note that for little-endian machines, + the value of this flag is a moot point since the native format is + little-endian. +*/ +uchar * +Field::pack(uchar *to, const uchar *from, uint max_length, + bool low_byte_first __attribute__((unused))) +{ + uint32 length= pack_length(); + set_if_smaller(length, max_length); + memcpy(to, from, length); + return to+length; +} + +/** Unpack a field from row data. - This method is used to unpack a field from a master whose size - of the field is less than that of the slave. - + This method is used to unpack a field from a master whose size of + the field is less than that of the slave. + + The <code>param_data</code> parameter is a two-byte integer (stored + in the least significant 16 bits of the unsigned integer) usually + consisting of two parts: the real type in the most significant byte + and a original pack length in the least significant byte. + + The exact layout of the <code>param_data</code> field is given by + the <code>Table_map_log_event::save_field_metadata()</code>. + + This is the default method for unpacking a field. It just copies + the memory block in byte order (of original pack length bytes or + length of field, whichever is smaller). + @param to Destination of the data @param from Source of the data - @param param_data Pack length of the field data + @param param_data Real type and original pack length of the field + data + + @param low_byte_first + If this flag is @c true, all composite entities (e.g., lengths) + should be unpacked in little-endian format; otherwise, the entities + are unpacked in native order. @return New pointer into memory based on from + length of the data */ -const uchar *Field::unpack(uchar* to, - const uchar *from, - uint param_data) +const uchar * +Field::unpack(uchar* to, const uchar *from, uint param_data, + bool low_byte_first __attribute__((unused))) { uint length=pack_length(); int from_type= 0; @@ -1420,19 +1485,18 @@ const uchar *Field::unpack(uchar* to, from_type= (param_data & 0xff00) >> 8U; // real_type. param_data= param_data & 0x00ff; // length. } + + if ((param_data == 0) || + (length == param_data) || + (from_type != real_type())) + { + memcpy(to, from, length); + return from+length; + } + uint len= (param_data && (param_data < length)) ? param_data : length; - /* - If the length is the same, use old unpack method. - If the param_data is 0, use the old unpack method. - This is possible if the table map was generated from a down-level - master or if the data was not available on the master. - If the real_types are not the same, use the old unpack method. - */ - if ((length == param_data) || - (param_data == 0) || - (from_type != real_type())) - return(unpack(to, from)); + memcpy(to, from, param_data > length ? length : len); return from+len; } @@ -2814,10 +2878,15 @@ uint Field_new_decimal::is_equal(Create_field *new_field) @return New pointer into memory based on from + length of the data */ -const uchar *Field_new_decimal::unpack(uchar* to, - const uchar *from, - uint param_data) +const uchar * +Field_new_decimal::unpack(uchar* to, + const uchar *from, + uint param_data, + bool low_byte_first) { + if (param_data == 0) + return Field::unpack(to, from, param_data, low_byte_first); + uint from_precision= (param_data & 0xff00) >> 8U; uint from_decimal= param_data & 0x00ff; uint length=pack_length(); @@ -3959,6 +4028,49 @@ void Field_longlong::sql_type(String &res) const } +/* + Floating-point numbers + */ + +uchar * +Field_real::pack(uchar *to, const uchar *from, + uint max_length, bool low_byte_first) +{ + DBUG_ENTER("Field_real::pack"); + DBUG_ASSERT(max_length >= pack_length()); + DBUG_PRINT("debug", ("pack_length(): %u", pack_length())); +#ifdef WORDS_BIGENDIAN + if (low_byte_first != table->s->db_low_byte_first) + { + const uchar *dptr= from + pack_length(); + while (dptr-- > from) + *to++ = *dptr; + DBUG_RETURN(to); + } + else +#endif + DBUG_RETURN(Field::pack(to, from, max_length, low_byte_first)); +} + +const uchar * +Field_real::unpack(uchar *to, const uchar *from, + uint param_data, bool low_byte_first) +{ + DBUG_ENTER("Field_real::unpack"); + DBUG_PRINT("debug", ("pack_length(): %u", pack_length())); +#ifdef WORDS_BIGENDIAN + if (low_byte_first != table->s->db_low_byte_first) + { + const uchar *dptr= from + pack_length(); + while (dptr-- > from) + *to++ = *dptr; + DBUG_RETURN(from + pack_length()); + } + else +#endif + DBUG_RETURN(Field::unpack(to, from, param_data, low_byte_first)); +} + /**************************************************************************** single precision float ****************************************************************************/ @@ -5596,6 +5708,9 @@ void Field_date::sql_type(String &res) const 1 Value was cut during conversion 2 Wrong date string 3 Datetime value that was cut (warning level NOTE) + This is used by opt_range.cc:get_mm_leaf(). Note that there is a + nearly-identical class Field_date doesn't ever return 3 from its + store function. */ int Field_newdate::store(const char *from,uint len,CHARSET_INFO *cs) @@ -6296,24 +6411,74 @@ int Field_str::store(double nr) ASSERT_COLUMN_MARKED_FOR_WRITE; char buff[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE]; uint length; - bool use_scientific_notation= TRUE; uint local_char_length= field_length / charset()->mbmaxlen; - /* - Check fabs(nr) against longest value that can be stored in field, - which depends on whether the value is < 1 or not, and negative or not - */ double anr= fabs(nr); + bool fractional= (anr != floor(anr)); int neg= (nr < 0.0) ? 1 : 0; - if (local_char_length > 4 && local_char_length < 32 && - (anr < 1.0 ? anr > 1/(log_10[max(0,(int) local_char_length-neg-2)]) /* -2 for "0." */ - : anr < log_10[local_char_length-neg]-1)) - use_scientific_notation= FALSE; - - length= (uint) my_sprintf(buff, (buff, "%-.*g", - (use_scientific_notation ? - max(0, (int)local_char_length-neg-5) : - local_char_length), - nr)); + uint max_length; + int exp; + uint digits; + uint i; + + /* Calculate the exponent from the 'e'-format conversion */ + if (anr < 1.0 && anr > 0) + { + for (exp= 0; anr < 1e-100; exp-= 100, anr*= 1e100); + for (; anr < 1e-10; exp-= 10, anr*= 1e10); + for (i= 1; anr < 1 / log_10[i]; exp--, i++); + exp--; + } + else + { + for (exp= 0; anr > 1e100; exp+= 100, anr/= 1e100); + for (; anr > 1e10; exp+= 10, anr/= 1e10); + for (i= 1; anr > log_10[i]; exp++, i++); + } + + max_length= local_char_length - neg; + + /* + Since in sprintf("%g") precision means the number of significant digits, + calculate the maximum number of significant digits if the 'f'-format + would be used (+1 for decimal point if the number has a fractional part). + */ + digits= max(0, (int) max_length - fractional); + /* + If the exponent is negative, decrease digits by the number of leading zeros + after the decimal point that do not count as significant digits. + */ + if (exp < 0) + digits= max(0, (int) digits + exp); + /* + 'e'-format is used only if the exponent is less than -4 or greater than or + equal to the precision. In this case we need to adjust the number of + significant digits to take "e+NN" + decimal point into account (hence -5). + We also have to reserve one additional character if abs(exp) >= 100. + */ + if (exp >= (int) digits || exp < -4) + digits= max(0, (int) (max_length - 5 - (exp >= 100 || exp <= -100))); + + /* Limit precision to DBL_DIG to avoid garbage past significant digits */ + set_if_smaller(digits, DBL_DIG); + + length= (uint) my_sprintf(buff, (buff, "%-.*g", digits, nr)); + +#ifdef __WIN__ + /* + Windows always zero-pads the exponent to 3 digits, we want to remove the + leading 0 to match the sprintf() output on other platforms. + */ + if ((exp >= (int) digits || exp < -4) && exp > -100 && exp < 100) + { + DBUG_ASSERT(length >= 6); /* 1e+NNN */ + uint tmp= length - 3; + buff[tmp]= buff[tmp + 1]; + tmp++; + buff[tmp]= buff[tmp + 1]; + length--; + } +#endif + /* +1 below is because "precision" in %g above means the max. number of significant digits, not the output width. @@ -6374,6 +6539,11 @@ int Field_longstr::store_decimal(const my_decimal *d) return store(str.ptr(), str.length(), str.charset()); } +uint32 Field_longstr::max_data_length() const +{ + return field_length + (field_length > 255 ? 2 : 1); +} + double Field_string::val_real(void) { @@ -6518,7 +6688,9 @@ void Field_string::sql_type(String &res) const } -uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length) +uchar *Field_string::pack(uchar *to, const uchar *from, + uint max_length, + bool low_byte_first __attribute__((unused))) { uint length= min(field_length,max_length); uint local_char_length= max_length/field_charset->mbmaxlen; @@ -6526,11 +6698,15 @@ uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length) local_char_length= my_charpos(field_charset, from, from+length, local_char_length); set_if_smaller(length, local_char_length); - while (length && from[length-1] == ' ') + while (length && from[length-1] == field_charset->pad_char) length--; + + // Length always stored little-endian *to++= (uchar) length; if (field_length > 255) *to++= (uchar) (length >> 8); + + // Store the actual bytes of the string memcpy(to, from, length); return to+length; } @@ -6552,34 +6728,27 @@ uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length) @return New pointer into memory based on from + length of the data */ -const uchar *Field_string::unpack(uchar *to, - const uchar *from, - uint param_data) -{ - uint from_len= param_data & 0x00ff; // length. - uint length= 0; - uint f_length; - f_length= (from_len < field_length) ? from_len : field_length; - DBUG_ASSERT(f_length <= 255); - length= (uint) *from++; - bitmap_set_bit(table->write_set,field_index); - store((const char *)from, length, system_charset_info); - return from+length; -} - - -const uchar *Field_string::unpack(uchar *to, const uchar *from) -{ +const uchar * +Field_string::unpack(uchar *to, + const uchar *from, + uint param_data, + bool low_byte_first __attribute__((unused))) +{ + uint from_length= + param_data ? min(param_data & 0x00ff, field_length) : field_length; uint length; - if (field_length > 255) + + if (from_length > 255) { length= uint2korr(from); from+= 2; } else length= (uint) *from++; - memcpy(to, from, (int) length); - bfill(to+length, field_length - length, ' '); + + memcpy(to, from, length); + // Pad the string with the pad character of the fields charset + bfill(to + length, field_length - length, field_charset->pad_char); return from+length; } @@ -6769,6 +6938,7 @@ const uint Field_varstring::MAX_SIZE= UINT_MAX16; int Field_varstring::do_save_field_metadata(uchar *metadata_ptr) { char *ptr= (char *)metadata_ptr; + DBUG_ASSERT(field_length <= 65535); int2store(ptr, field_length); return 2; } @@ -6996,22 +7166,30 @@ uint32 Field_varstring::data_length() Here the number of length bytes are depending on the given max_length */ -uchar *Field_varstring::pack(uchar *to, const uchar *from, uint max_length) +uchar *Field_varstring::pack(uchar *to, const uchar *from, + uint max_length, + bool low_byte_first __attribute__((unused))) { uint length= length_bytes == 1 ? (uint) *from : uint2korr(from); set_if_smaller(max_length, field_length); if (length > max_length) length=max_length; - *to++= (char) (length & 255); + + /* Length always stored little-endian */ + *to++= length & 0xFF; if (max_length > 255) - *to++= (char) (length >> 8); - if (length) + *to++= (length >> 8) & 0xFF; + + /* Store bytes of string */ + if (length > 0) memcpy(to, from+length_bytes, length); return to+length; } -uchar *Field_varstring::pack_key(uchar *to, const uchar *key, uint max_length) +uchar * +Field_varstring::pack_key(uchar *to, const uchar *key, uint max_length, + bool low_byte_first __attribute__((unused))) { uint length= length_bytes == 1 ? (uint) *key : uint2korr(key); uint local_char_length= ((field_charset->mbmaxlen > 1) ? @@ -7050,8 +7228,9 @@ uchar *Field_varstring::pack_key(uchar *to, const uchar *key, uint max_length) Pointer to end of 'key' (To the next key part if multi-segment key) */ -const uchar *Field_varstring::unpack_key(uchar *to, const uchar *key, - uint max_length) +const uchar * +Field_varstring::unpack_key(uchar *to, const uchar *key, uint max_length, + bool low_byte_first __attribute__((unused))) { /* get length of the blob key */ uint32 length= *key++; @@ -7080,8 +7259,9 @@ const uchar *Field_varstring::unpack_key(uchar *to, const uchar *key, end of key storage */ -uchar *Field_varstring::pack_key_from_key_image(uchar *to, const uchar *from, - uint max_length) +uchar * +Field_varstring::pack_key_from_key_image(uchar *to, const uchar *from, uint max_length, + bool low_byte_first __attribute__((unused))) { /* Key length is always stored as 2 bytes */ uint length= uint2korr(from); @@ -7101,6 +7281,9 @@ uchar *Field_varstring::pack_key_from_key_image(uchar *to, const uchar *from, This method is used to unpack a varstring field from a master whose size of the field is less than that of the slave. + + @note + The string length is always packed little-endian. @param to Destination of the data @param from Source of the data @@ -7108,9 +7291,10 @@ uchar *Field_varstring::pack_key_from_key_image(uchar *to, const uchar *from, @return New pointer into memory based on from + length of the data */ -const uchar *Field_varstring::unpack(uchar *to, - const uchar *from, - uint param_data) +const uchar * +Field_varstring::unpack(uchar *to, const uchar *from, + uint param_data, + bool low_byte_first __attribute__((unused))) { uint length; uint l_bytes= (param_data && (param_data < field_length)) ? @@ -7122,28 +7306,7 @@ const uchar *Field_varstring::unpack(uchar *to, if (length_bytes == 2) to[1]= 0; } - else - { - length= uint2korr(from); - to[0]= *from++; - to[1]= *from++; - } - if (length) - memcpy(to+ length_bytes, from, length); - return from+length; -} - - -/* - unpack field packed with Field_varstring::pack() -*/ - -const uchar *Field_varstring::unpack(uchar *to, const uchar *from) -{ - uint length; - if (length_bytes == 1) - length= (uint) (*to= *from++); - else + else /* l_bytes == 2 */ { length= uint2korr(from); to[0]= *from++; @@ -7392,9 +7555,9 @@ void Field_blob::store_length(uchar *i_ptr, } -uint32 Field_blob::get_length(const uchar *pos, bool low_byte_first) +uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg, bool low_byte_first) { - switch (packlength) { + switch (packlength_arg) { case 1: return (uint32) pos[0]; case 2: @@ -7825,26 +7988,37 @@ void Field_blob::sql_type(String &res) const } } - -uchar *Field_blob::pack(uchar *to, const uchar *from, uint max_length) +uchar *Field_blob::pack(uchar *to, const uchar *from, + uint max_length, bool low_byte_first) { + DBUG_ENTER("Field_blob::pack"); + DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx;" + " max_length: %u; low_byte_first: %d", + (ulong) to, (ulong) from, + max_length, low_byte_first)); + DBUG_DUMP("record", from, table->s->reclength); uchar *save= ptr; ptr= (uchar*) from; uint32 length=get_length(); // Length of from string - if (length > max_length) - { - length=max_length; - store_length(to,packlength,length,TRUE); - } - else - memcpy(to,from,packlength); // Copy length - if (length) + + /* + Store max length, which will occupy packlength bytes. If the max + length given is smaller than the actual length of the blob, we + just store the initial bytes of the blob. + */ + store_length(to, packlength, min(length, max_length), low_byte_first); + + /* + Store the actual blob data, which will occupy 'length' bytes. + */ + if (length > 0) { get_ptr((uchar**) &from); memcpy(to+packlength, from,length); } ptr=save; // Restore org row pointer - return to+packlength+length; + DBUG_DUMP("packed", to, packlength + length); + DBUG_RETURN(to+packlength+length); } @@ -7859,28 +8033,30 @@ uchar *Field_blob::pack(uchar *to, const uchar *from, uint max_length) @param to Destination of the data @param from Source of the data - @param param_data not used + @param param_data @c TRUE if base types should be stored in little- + endian format, @c FALSE if native format should + be used. @return New pointer into memory based on from + length of the data */ const uchar *Field_blob::unpack(uchar *to, const uchar *from, - uint param_data) -{ - return unpack(to, from); -} - - -const uchar *Field_blob::unpack(uchar *to, const uchar *from) -{ - uint32 length=get_length(from); - memcpy(to,from,packlength); - from+=packlength; - if (length) - memcpy_fixed(to+packlength, &from, sizeof(from)); - else - bzero(to+packlength,sizeof(from)); - return from+length; + uint param_data, + bool low_byte_first) +{ + DBUG_ENTER("Field_blob::unpack"); + DBUG_PRINT("enter", ("to: 0x%lx; from: 0x%lx;" + " param_data: %u; low_byte_first: %d", + (ulong) to, (ulong) from, param_data, low_byte_first)); + uint const master_packlength= + param_data > 0 ? param_data & 0xFF : packlength; + uint32 const length= get_length(from, master_packlength, low_byte_first); + DBUG_DUMP("packed", from, length + master_packlength); + bitmap_set_bit(table->write_set, field_index); + store(reinterpret_cast<const char*>(from) + master_packlength, + length, field_charset); + DBUG_DUMP("record", to, table->s->reclength); + DBUG_RETURN(from + master_packlength + length); } /* Keys for blobs are like keys on varchars */ @@ -7930,7 +8106,9 @@ int Field_blob::pack_cmp(const uchar *b, uint key_length_arg, /* Create a packed key that will be used for storage from a MySQL row */ -uchar *Field_blob::pack_key(uchar *to, const uchar *from, uint max_length) +uchar * +Field_blob::pack_key(uchar *to, const uchar *from, uint max_length, + bool low_byte_first __attribute__((unused))) { uchar *save= ptr; ptr= (uchar*) from; @@ -7975,8 +8153,9 @@ uchar *Field_blob::pack_key(uchar *to, const uchar *from, uint max_length) Pointer into 'from' past the last byte copied from packed key. */ -const uchar *Field_blob::unpack_key(uchar *to, const uchar *from, - uint max_length) +const uchar * +Field_blob::unpack_key(uchar *to, const uchar *from, uint max_length, + bool low_byte_first __attribute__((unused))) { /* get length of the blob key */ uint32 length= *from++; @@ -7999,8 +8178,9 @@ const uchar *Field_blob::unpack_key(uchar *to, const uchar *from, /* Create a packed key that will be used for storage from a MySQL key */ -uchar *Field_blob::pack_key_from_key_image(uchar *to, const uchar *from, - uint max_length) +uchar * +Field_blob::pack_key_from_key_image(uchar *to, const uchar *from, uint max_length, + bool low_byte_first __attribute__((unused))) { uint length=uint2korr(from); if (length > max_length) @@ -8947,9 +9127,11 @@ void Field_bit::sql_type(String &res) const } -uchar *Field_bit::pack(uchar *to, const uchar *from, uint max_length) +uchar * +Field_bit::pack(uchar *to, const uchar *from, uint max_length, + bool low_byte_first __attribute__((unused))) { - DBUG_ASSERT(max_length); + DBUG_ASSERT(max_length > 0); uint length; if (bit_len > 0) { @@ -8984,28 +9166,44 @@ uchar *Field_bit::pack(uchar *to, const uchar *from, uint max_length) /** Unpack a bit field from row data. - This method is used to unpack a bit field from a master whose size + This method is used to unpack a bit field from a master whose size of the field is less than that of the slave. - + @param to Destination of the data @param from Source of the data @param param_data Bit length (upper) and length (lower) values @return New pointer into memory based on from + length of the data */ -const uchar *Field_bit::unpack(uchar *to, - const uchar *from, - uint param_data) +const uchar * +Field_bit::unpack(uchar *to, const uchar *from, uint param_data, + bool low_byte_first __attribute__((unused))) { uint const from_len= (param_data >> 8U) & 0x00ff; uint const from_bit_len= param_data & 0x00ff; /* - If the master and slave have the same sizes, then use the old - unpack() method. + If the parameter data is zero (i.e., undefined), or if the master + and slave have the same sizes, then use the old unpack() method. */ - if ((from_bit_len == bit_len) && - (from_len == bytes_in_rec)) - return(unpack(to, from)); + if (param_data == 0 || + (from_bit_len == bit_len) && (from_len == bytes_in_rec)) + { + if (bit_len > 0) + { + /* + set_rec_bits is a macro, don't put the post-increment in the + argument since that might cause strange side-effects. + + For the choice of the second argument, see the explanation for + Field_bit::pack(). + */ + set_rec_bits(*from, bit_ptr + (to - ptr), bit_ofs, bit_len); + from++; + } + memcpy(to, from, bytes_in_rec); + return from + bytes_in_rec; + } + /* We are converting a smaller bit field to a larger one here. To do that, we first need to construct a raw value for the original @@ -9033,25 +9231,6 @@ const uchar *Field_bit::unpack(uchar *to, } -const uchar *Field_bit::unpack(uchar *to, const uchar *from) -{ - if (bit_len > 0) - { - /* - set_rec_bits is a macro, don't put the post-increment in the - argument since that might cause strange side-effects. - - For the choice of the second argument, see the explanation for - Field_bit::pack(). - */ - set_rec_bits(*from, bit_ptr + (to - ptr), bit_ofs, bit_len); - from++; - } - memcpy(to, from, bytes_in_rec); - return from + bytes_in_rec; -} - - void Field_bit::set_default() { if (bit_len > 0) diff --git a/sql/field.h b/sql/field.h index 2a381065f63..27be2601270 100644 --- a/sql/field.h +++ b/sql/field.h @@ -176,6 +176,17 @@ public: */ virtual uint32 data_length() { return pack_length(); } virtual uint32 sort_length() const { return pack_length(); } + + /** + Get the maximum size of the data in packed format. + + @return Maximum data length of the field when packed using the + Field::pack() function. + */ + virtual uint32 max_data_length() const { + return pack_length(); + }; + virtual int reset(void) { bzero(ptr,pack_length()); return 0; } virtual void reset_fields() {} virtual void set_default() @@ -358,32 +369,45 @@ public: return str; } virtual bool send_binary(Protocol *protocol); - virtual uchar *pack(uchar *to, const uchar *from, uint max_length=~(uint) 0) + + virtual uchar *pack(uchar *to, const uchar *from, + uint max_length, bool low_byte_first); + /** + @overload Field::pack(uchar*, const uchar*, uint, bool) + */ + uchar *pack(uchar *to, const uchar *from) { - uint32 length=pack_length(); - memcpy(to,from,length); - return to+length; + DBUG_ENTER("Field::pack"); + uchar *result= this->pack(to, from, UINT_MAX, table->s->db_low_byte_first); + DBUG_RETURN(result); } - virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data); - virtual const uchar *unpack(uchar* to, const uchar *from) + + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first); + /** + @overload Field::unpack(uchar*, const uchar*, uint, bool) + */ + const uchar *unpack(uchar* to, const uchar *from) { - uint length=pack_length(); - memcpy(to,from,length); - return from+length; + DBUG_ENTER("Field::unpack"); + const uchar *result= unpack(to, from, 0U, table->s->db_low_byte_first); + DBUG_RETURN(result); } - virtual uchar *pack_key(uchar* to, const uchar *from, uint max_length) + + virtual uchar *pack_key(uchar* to, const uchar *from, + uint max_length, bool low_byte_first) { - return pack(to,from,max_length); + return pack(to, from, max_length, low_byte_first); } virtual uchar *pack_key_from_key_image(uchar* to, const uchar *from, - uint max_length) + uint max_length, bool low_byte_first) { - return pack(to,from,max_length); + return pack(to, from, max_length, low_byte_first); } virtual const uchar *unpack_key(uchar* to, const uchar *from, - uint max_length) + uint max_length, bool low_byte_first) { - return unpack(to,from); + return unpack(to, from, max_length, low_byte_first); } virtual uint packed_col_length(const uchar *to, uint length) { return length;} @@ -568,6 +592,7 @@ public: {} int store_decimal(const my_decimal *d); + uint32 max_data_length() const; }; /* base class for float and double and decimal (old one) */ @@ -588,6 +613,10 @@ public: int truncate(double *nr, double max_length); uint32 max_display_length() { return field_length; } uint size_of() const { return sizeof(*this); } + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first); + virtual uchar *pack(uchar* to, const uchar *from, + uint max_length, bool low_byte_first); }; @@ -616,6 +645,16 @@ public: void overflow(bool negative); bool zero_pack() const { return 0; } void sql_type(String &str) const; + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first) + { + return Field::unpack(to, from, param_data, low_byte_first); + } + virtual uchar *pack(uchar* to, const uchar *from, + uint max_length, bool low_byte_first) + { + return Field::pack(to, from, max_length, low_byte_first); + } }; @@ -666,7 +705,8 @@ public: uint row_pack_length() { return pack_length(); } int compatible_field_size(uint field_metadata); uint is_equal(Create_field *new_field); - virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data); + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first); }; @@ -697,6 +737,20 @@ public: uint32 pack_length() const { return 1; } void sql_type(String &str) const; uint32 max_display_length() { return 4; } + + virtual uchar *pack(uchar* to, const uchar *from, + uint max_length, bool low_byte_first) + { + *to= *from; + return to + 1; + } + + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first) + { + *to= *from; + return from + 1; + } }; @@ -732,8 +786,47 @@ public: uint32 pack_length() const { return 2; } void sql_type(String &str) const; uint32 max_display_length() { return 6; } -}; + virtual uchar *pack(uchar* to, const uchar *from, + uint max_length, bool low_byte_first) + { + int16 val; +#ifdef WORDS_BIGENDIAN + if (table->s->db_low_byte_first) + val = sint2korr(from); + else +#endif + shortget(val, from); + +#ifdef WORDS_BIGENDIAN + if (low_byte_first) + int2store(to, val); + else +#endif + shortstore(to, val); + return to + sizeof(val); + } + + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first) + { + int16 val; +#ifdef WORDS_BIGENDIAN + if (low_byte_first) + val = sint2korr(from); + else +#endif + shortget(val, from); + +#ifdef WORDS_BIGENDIAN + if (table->s->db_low_byte_first) + int2store(to, val); + else +#endif + shortstore(to, val); + return from + sizeof(val); + } +}; class Field_medium :public Field_num { public: @@ -762,6 +855,18 @@ public: uint32 pack_length() const { return 3; } void sql_type(String &str) const; uint32 max_display_length() { return 8; } + + virtual uchar *pack(uchar* to, const uchar *from, + uint max_length, bool low_byte_first) + { + return Field::pack(to, from, max_length, low_byte_first); + } + + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first) + { + return Field::unpack(to, from, param_data, low_byte_first); + } }; @@ -797,6 +902,45 @@ public: uint32 pack_length() const { return 4; } void sql_type(String &str) const; uint32 max_display_length() { return MY_INT32_NUM_DECIMAL_DIGITS; } + virtual uchar *pack(uchar* to, const uchar *from, + uint max_length, bool low_byte_first) + { + int32 val; +#ifdef WORDS_BIGENDIAN + if (table->s->db_low_byte_first) + val = sint4korr(from); + else +#endif + longget(val, from); + +#ifdef WORDS_BIGENDIAN + if (low_byte_first) + int4store(to, val); + else +#endif + longstore(to, val); + return to + sizeof(val); + } + + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first) + { + int32 val; +#ifdef WORDS_BIGENDIAN + if (low_byte_first) + val = sint4korr(from); + else +#endif + longget(val, from); + +#ifdef WORDS_BIGENDIAN + if (table->s->db_low_byte_first) + int4store(to, val); + else +#endif + longstore(to, val); + return from + sizeof(val); + } }; @@ -839,6 +983,45 @@ public: void sql_type(String &str) const; bool can_be_compared_as_longlong() const { return TRUE; } uint32 max_display_length() { return 20; } + virtual uchar *pack(uchar* to, const uchar *from, + uint max_length, bool low_byte_first) + { + int64 val; +#ifdef WORDS_BIGENDIAN + if (table->s->db_low_byte_first) + val = sint8korr(from); + else +#endif + longlongget(val, from); + +#ifdef WORDS_BIGENDIAN + if (low_byte_first) + int8store(to, val); + else +#endif + longlongstore(to, val); + return to + sizeof(val); + } + + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first) + { + int64 val; +#ifdef WORDS_BIGENDIAN + if (low_byte_first) + val = sint8korr(from); + else +#endif + longlongget(val, from); + +#ifdef WORDS_BIGENDIAN + if (table->s->db_low_byte_first) + int8store(to, val); + else +#endif + longlongstore(to, val); + return from + sizeof(val); + } }; #endif @@ -1220,9 +1403,10 @@ public: int cmp(const uchar *,const uchar *); void sort_string(uchar *buff,uint length); void sql_type(String &str) const; - uchar *pack(uchar *to, const uchar *from, uint max_length=~(uint) 0); - virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data); - const uchar *unpack(uchar* to, const uchar *from); + virtual uchar *pack(uchar *to, const uchar *from, + uint max_length, bool low_byte_first); + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first); uint pack_length_from_metadata(uint field_metadata) { return (field_metadata & 0x00ff); } uint row_pack_length() { return (field_length + 1); } @@ -1300,13 +1484,15 @@ public: uint get_key_image(uchar *buff,uint length, imagetype type); void set_key_image(const uchar *buff,uint length); void sql_type(String &str) const; - uchar *pack(uchar *to, const uchar *from, uint max_length=~(uint) 0); - uchar *pack_key(uchar *to, const uchar *from, uint max_length); + virtual uchar *pack(uchar *to, const uchar *from, + uint max_length, bool low_byte_first); + uchar *pack_key(uchar *to, const uchar *from, uint max_length, bool low_byte_first); uchar *pack_key_from_key_image(uchar* to, const uchar *from, - uint max_length); - virtual const uchar *unpack(uchar* to, const uchar *from, uint param_data); - const uchar *unpack(uchar* to, const uchar *from); - const uchar *unpack_key(uchar* to, const uchar *from, uint max_length); + uint max_length, bool low_byte_first); + virtual const uchar *unpack(uchar* to, const uchar *from, + uint param_data, bool low_byte_first); + const uchar *unpack_key(uchar* to, const uchar *from, + uint max_length, bool low_byte_first); int pack_cmp(const uchar *a, const uchar *b, uint key_length, my_bool insert_or_update); int pack_cmp(const uchar *b, uint key_length,my_bool insert_or_update); @@ -1399,7 +1585,7 @@ public: { return (uint32) (packlength); } uint row_pack_length() { return pack_length_no_ptr(); } uint32 sort_length() const; - inline uint32 max_data_length() const + virtual uint32 max_data_length() const { return (uint32) (((ulonglong) 1 << (packlength*8)) -1); } @@ -1427,13 +1613,13 @@ public: @returns The length in the row plus the size of the data. */ uint32 get_packed_size(const uchar *ptr_arg, bool low_byte_first) - {return packlength + get_length(ptr_arg, low_byte_first);} + {return packlength + get_length(ptr_arg, packlength, low_byte_first);} inline uint32 get_length(uint row_offset= 0) - { return get_length(ptr+row_offset, table->s->db_low_byte_first); } - uint32 get_length(const uchar *ptr, bool low_byte_first); + { return get_length(ptr+row_offset, this->packlength, table->s->db_low_byte_first); } + uint32 get_length(const uchar *ptr, uint packlength, bool low_byte_first); uint32 get_length(const uchar *ptr_arg) - { return get_length(ptr_arg, table->s->db_low_byte_first); } + { return get_length(ptr_arg, this->packlength, table->s->db_low_byte_first); } void put_length(uchar *pos, uint32 length); inline void get_ptr(uchar **str) { @@ -1474,13 +1660,16 @@ public: memcpy_fixed(ptr+packlength,&tmp,sizeof(char*)); return 0; } - uchar *pack(uchar *to, const uchar *from, uint max_length= ~(uint) 0); - uchar *pack_key(uchar *to, const uchar *from, uint max_length); + virtual uchar *pack(uchar *to, const uchar *from, + uint max_length, bool low_byte_first); + uchar *pack_key(uchar *to, const uchar *from, + uint max_length, bool low_byte_first); uchar *pack_key_from_key_image(uchar* to, const uchar *from, - uint max_length); - virtual const uchar *unpack(uchar *to, const uchar *from, uint param_data); - const uchar *unpack(uchar *to, const uchar *from); - const uchar *unpack_key(uchar* to, const uchar *from, uint max_length); + uint max_length, bool low_byte_first); + virtual const uchar *unpack(uchar *to, const uchar *from, + uint param_data, bool low_byte_first); + const uchar *unpack_key(uchar* to, const uchar *from, + uint max_length, bool low_byte_first); int pack_cmp(const uchar *a, const uchar *b, uint key_length, my_bool insert_or_update); int pack_cmp(const uchar *b, uint key_length,my_bool insert_or_update); @@ -1632,6 +1821,7 @@ public: enum_field_types type() const { return MYSQL_TYPE_BIT; } enum ha_base_keytype key_type() const { return HA_KEYTYPE_BIT; } uint32 key_length() const { return (uint32) (field_length + 7) / 8; } + uint32 max_data_length() const { return (field_length + 7) / 8; } uint32 max_display_length() { return field_length; } uint size_of() const { return sizeof(*this); } Item_result result_type () const { return INT_RESULT; } @@ -1673,9 +1863,10 @@ public: { return (bytes_in_rec + ((bit_len > 0) ? 1 : 0)); } int compatible_field_size(uint field_metadata); void sql_type(String &str) const; - uchar *pack(uchar *to, const uchar *from, uint max_length=~(uint) 0); - virtual const uchar *unpack(uchar *to, const uchar *from, uint param_data); - const uchar *unpack(uchar* to, const uchar *from); + virtual uchar *pack(uchar *to, const uchar *from, + uint max_length, bool low_byte_first); + virtual const uchar *unpack(uchar *to, const uchar *from, + uint param_data, bool low_byte_first); virtual void set_default(); Field *new_key_field(MEM_ROOT *root, struct st_table *new_table, diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc index 55af0c38aed..be75eff2575 100644 --- a/sql/ha_ndbcluster_binlog.cc +++ b/sql/ha_ndbcluster_binlog.cc @@ -321,7 +321,7 @@ ndbcluster_binlog_open_table(THD *thd, NDB_SHARE *share, DBUG_ENTER("ndbcluster_binlog_open_table"); safe_mutex_assert_owner(&LOCK_open); - init_tmp_table_share(table_share, share->db, 0, share->table_name, + init_tmp_table_share(thd, table_share, share->db, 0, share->table_name, share->key); if ((error= open_table_def(thd, table_share, 0))) { diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 7234222b79f..d251e056c3e 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -1531,6 +1531,14 @@ int ha_partition::copy_partitions(ulonglong *copied, ulonglong *deleted) longlong func_value; DBUG_ENTER("ha_partition::copy_partitions"); + if (m_part_info->linear_hash_ind) + { + if (m_part_info->part_type == HASH_PARTITION) + set_linear_hash_mask(m_part_info, m_part_info->no_parts); + else + set_linear_hash_mask(m_part_info, m_part_info->no_subparts); + } + while (reorg_part < m_reorged_parts) { handler *file= m_reorged_file[reorg_part]; @@ -1598,7 +1606,11 @@ error: void ha_partition::update_create_info(HA_CREATE_INFO *create_info) { - m_file[0]->update_create_info(create_info); + info(HA_STATUS_AUTO); + + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + create_info->auto_increment_value= stats.auto_increment_value; + create_info->data_file_name= create_info->index_file_name = NULL; return; } @@ -3841,7 +3853,7 @@ int ha_partition::read_range_first(const key_range *start_key, start_key->key, start_key->keypart_map, start_key->flag); } - DBUG_RETURN (error? error: compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE); + DBUG_RETURN(error); } @@ -3986,7 +3998,8 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same) } else if (!(error= file->index_next(buf))) { - if (compare_key(end_range) <= 0) + if (!(file->table_flags() & HA_READ_ORDER) || + compare_key(end_range) <= 0) { m_last_part= m_part_spec.start_part; DBUG_RETURN(0); // Row was in range @@ -4063,7 +4076,8 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf) } if (!error) { - if (compare_key(end_range) <= 0) + if (!(file->table_flags() & HA_READ_ORDER) || + compare_key(end_range) <= 0) { m_last_part= i; DBUG_RETURN(0); diff --git a/sql/handler.cc b/sql/handler.cc index 8a2355c8a87..a4926071598 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2643,7 +2643,7 @@ int ha_create_table(THD *thd, const char *path, TABLE_SHARE share; DBUG_ENTER("ha_create_table"); - init_tmp_table_share(&share, db, 0, table_name, path); + init_tmp_table_share(thd, &share, db, 0, table_name, path); if (open_table_def(thd, &share, 0) || open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table, TRUE)) @@ -2709,7 +2709,7 @@ int ha_create_table_from_engine(THD* thd, const char *db, const char *name) if (error) DBUG_RETURN(2); - init_tmp_table_share(&share, db, 0, name, path); + init_tmp_table_share(thd, &share, db, 0, name, path); if (open_table_def(thd, &share, 0)) { DBUG_RETURN(3); @@ -3717,11 +3717,12 @@ int handler::ha_reset() int handler::ha_write_row(uchar *buf) { int error; + DBUG_ENTER("handler::ha_write_row"); if (unlikely(error= write_row(buf))) - return error; + DBUG_RETURN(error); if (unlikely(error= binlog_log_row<Write_rows_log_event>(table, 0, buf))) - return error; - return 0; + DBUG_RETURN(error); /* purecov: inspected */ + DBUG_RETURN(0); } diff --git a/sql/handler.h b/sql/handler.h index b91d8a39b88..140b44704a9 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1637,13 +1637,22 @@ public: virtual int repair_partitions(THD *thd) { return HA_ERR_WRONG_COMMAND; } - /* lock_count() can be more than one if the table is a MERGE */ + /** + @note lock_count() can return > 1 if the table is MERGE or partitioned. + */ virtual uint lock_count(void) const { return 1; } /** Is not invoked for non-transactional temporary tables. + @note store_lock() can return more than one lock if the table is MERGE + or partitioned. + @note that one can NOT rely on table->in_use in store_lock(). It may refer to a different thread if called from mysql_lock_abort_for_thread(). + + @note If the table is MERGE, store_lock() can return less locks + than lock_count() claimed. This can happen when the MERGE children + are not attached when this is called from another thread. */ virtual THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, diff --git a/sql/item.cc b/sql/item.cc index 108dddc8939..4d579597d21 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1240,7 +1240,17 @@ bool Item_name_const::is_null() Item::Type Item_name_const::type() const { - return value_item->type(); + /* + As + 1. one can try to create the Item_name_const passing non-constant + arguments, although it's incorrect and + 2. the type() method can be called before the fix_fields() to get + type information for a further type cast, e.g. + if (item->type() == FIELD_ITEM) + ((Item_field *) item)->... + we return NULL_ITEM in the case to avoid wrong casting. + */ + return valid_args ? value_item->type() : NULL_ITEM; } @@ -1252,14 +1262,14 @@ bool Item_name_const::fix_fields(THD *thd, Item **ref) s.length(0); if (value_item->fix_fields(thd, &value_item) || - name_item->fix_fields(thd, &name_item)) - return TRUE; - if (!(value_item->const_item() && name_item->const_item())) + name_item->fix_fields(thd, &name_item) || + !value_item->const_item() || + !name_item->const_item() || + !(item_name= name_item->val_str(&s))) // Can't have a NULL name + { + my_error(ER_RESERVED_SYNTAX, MYF(0), "NAME_CONST"); return TRUE; - - if (!(item_name= name_item->val_str(&s))) - return TRUE; /* Can't have a NULL name */ - + } set_name(item_name->ptr(), (uint) item_name->length(), system_charset_info); max_length= value_item->max_length; decimals= value_item->decimals; diff --git a/sql/item.h b/sql/item.h index 7693a2ffec7..b98389bc8d4 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1242,11 +1242,13 @@ class Item_name_const : public Item { Item *value_item; Item *name_item; + bool valid_args; public: Item_name_const(Item *name_arg, Item *val): value_item(val), name_item(name_arg) { - if(!value_item->basic_const_item()) + if (!(valid_args= name_item->basic_const_item() & + value_item->basic_const_item())) my_error(ER_WRONG_ARGUMENTS, MYF(0), "NAME_CONST"); Item::maybe_null= TRUE; } @@ -2627,10 +2629,19 @@ protected: For all other uses of Item_cache, cached_field doesn't matter. */ Field *cached_field; + enum enum_field_types cached_field_type; public: - Item_cache(): example(0), used_table_map(0), cached_field(0) + Item_cache(): + example(0), used_table_map(0), cached_field(0), cached_field_type(MYSQL_TYPE_STRING) { - fixed= 1; null_value= 1; + fixed= 1; + null_value= 1; + } + Item_cache(enum_field_types field_type_arg): + example(0), used_table_map(0), cached_field(0), cached_field_type(field_type_arg) + { + fixed= 1; + null_value= 1; } void set_used_tables(table_map map) { used_table_map= map; } @@ -2649,6 +2660,7 @@ public: }; virtual void store(Item *)= 0; enum Type type() const { return CACHE_ITEM; } + enum_field_types field_type() const { return cached_field_type; } static Item_cache* get_cache(const Item *item); table_map used_tables() const { return used_table_map; } virtual void keep_array() {} @@ -2672,6 +2684,8 @@ protected: longlong value; public: Item_cache_int(): Item_cache(), value(0) {} + Item_cache_int(enum_field_types field_type_arg): + Item_cache(field_type_arg), value(0) {} void store(Item *item); void store(Item *item, longlong val_arg); diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 235cce21fb1..1d1e28593d4 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -24,7 +24,8 @@ #include <m_ctype.h> #include "sql_select.h" -static bool convert_constant_item(THD *thd, Field *field, Item **item); +static bool convert_constant_item(THD *thd, Item_field *field_item, + Item **item); static Item_result item_store_type(Item_result a, Item *item, my_bool unsigned_flag) @@ -351,7 +352,7 @@ longlong Item_func_nop_all::val_int() SYNOPSIS convert_constant_item() thd thread handle - field item will be converted using the type of this field + field_item item will be converted using the type of this field item [in/out] reference to the item to convert DESCRIPTION @@ -374,8 +375,10 @@ longlong Item_func_nop_all::val_int() 1 Item was replaced with an integer version of the item */ -static bool convert_constant_item(THD *thd, Field *field, Item **item) +static bool convert_constant_item(THD *thd, Item_field *field_item, + Item **item) { + Field *field= field_item->field; int result= 0; if (!(*item)->with_subselect && (*item)->const_item()) @@ -385,9 +388,11 @@ static bool convert_constant_item(THD *thd, Field *field, Item **item) enum_check_fields orig_count_cuted_fields= thd->count_cuted_fields; my_bitmap_map *old_write_map; my_bitmap_map *old_read_map; + ulonglong orig_field_val; /* original field value if valid */ LINT_INIT(old_write_map); LINT_INIT(old_read_map); + LINT_INIT(orig_field_val); if (table) { @@ -398,6 +403,13 @@ static bool convert_constant_item(THD *thd, Field *field, Item **item) thd->variables.sql_mode= (orig_sql_mode & ~MODE_NO_ZERO_DATE) | MODE_INVALID_DATES; thd->count_cuted_fields= CHECK_FIELD_IGNORE; + + /* + Store the value of the field if it references an outer field because + the call to save_in_field below overrides that value. + */ + if (field_item->depended_from) + orig_field_val= field->val_int(); if (!(*item)->is_null() && !(*item)->save_in_field(field, 1)) { Item *tmp= new Item_int_with_ref(field->val_int(), *item, @@ -406,6 +418,13 @@ static bool convert_constant_item(THD *thd, Field *field, Item **item) thd->change_item_tree(item, tmp); result= 1; // Item was replaced } + /* Restore the original field value. */ + if (field_item->depended_from) + { + result= field->store(orig_field_val, TRUE); + /* orig_field_val must be a valid value that can be restored back. */ + DBUG_ASSERT(!result); + } thd->variables.sql_mode= orig_sql_mode; thd->count_cuted_fields= orig_count_cuted_fields; if (table) @@ -462,15 +481,14 @@ void Item_bool_func2::fix_length_and_dec() thd= current_thd; if (!thd->is_context_analysis_only()) { - Item *arg_real_item= args[0]->real_item(); - if (arg_real_item->type() == FIELD_ITEM) + if (args[0]->real_item()->type() == FIELD_ITEM) { - Field *field=((Item_field*) arg_real_item)->field; - if (field->can_be_compared_as_longlong() && - !(arg_real_item->is_datetime() && + Item_field *field_item= (Item_field*) (args[0]->real_item()); + if (field_item->field->can_be_compared_as_longlong() && + !(field_item->is_datetime() && args[1]->result_type() == STRING_RESULT)) { - if (convert_constant_item(thd, field,&args[1])) + if (convert_constant_item(thd, field_item, &args[1])) { cmp.set_cmp_func(this, tmp_arg, tmp_arg+1, INT_RESULT); // Works for all types. @@ -479,15 +497,14 @@ void Item_bool_func2::fix_length_and_dec() } } } - arg_real_item= args[1]->real_item(); - if (arg_real_item->type() == FIELD_ITEM) + if (args[1]->real_item()->type() == FIELD_ITEM) { - Field *field=((Item_field*) arg_real_item)->field; - if (field->can_be_compared_as_longlong() && - !(arg_real_item->is_datetime() && + Item_field *field_item= (Item_field*) (args[1]->real_item()); + if (field_item->field->can_be_compared_as_longlong() && + !(field_item->is_datetime() && args[0]->result_type() == STRING_RESULT)) { - if (convert_constant_item(thd, field,&args[0])) + if (convert_constant_item(thd, field_item, &args[0])) { cmp.set_cmp_func(this, tmp_arg, tmp_arg+1, INT_RESULT); // Works for all types. @@ -933,12 +950,15 @@ get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg, { value= item->val_int(); *is_null= item->null_value; + enum_field_types f_type= item->field_type(); /* Item_date_add_interval may return MYSQL_TYPE_STRING as the result field type. To detect that the DATE value has been returned we - compare it with 1000000L - any DATE value should be less than it. + compare it with 100000000L - any DATE value should be less than it. + Don't shift cached DATETIME values up for the second time. */ - if (item->field_type() == MYSQL_TYPE_DATE || value < 100000000L) + if (f_type == MYSQL_TYPE_DATE || + (f_type != MYSQL_TYPE_DATETIME && value < 100000000L)) value*= 1000000L; } else @@ -975,7 +995,7 @@ get_datetime_value(THD *thd, Item ***item_arg, Item **cache_arg, if (item->const_item() && cache_arg && (item->type() != Item::FUNC_ITEM || ((Item_func*)item)->functype() != Item_func::GUSERVAR_FUNC)) { - Item_cache_int *cache= new Item_cache_int(); + Item_cache_int *cache= new Item_cache_int(MYSQL_TYPE_DATETIME); /* Mark the cache as non-const to prevent re-caching. */ cache->set_used_tables(1); cache->store(item, value); @@ -1691,26 +1711,29 @@ bool Item_func_opt_neg::eq(const Item *item, bool binary_cmp) const void Item_func_interval::fix_length_and_dec() { + uint rows= row->cols(); + use_decimal_comparison= ((row->element_index(0)->result_type() == DECIMAL_RESULT) || (row->element_index(0)->result_type() == INT_RESULT)); - if (row->cols() > 8) + if (rows > 8) { - bool consts=1; + bool not_null_consts= TRUE; - for (uint i=1 ; consts && i < row->cols() ; i++) + for (uint i= 1; not_null_consts && i < rows; i++) { - consts&= row->element_index(i)->const_item(); + Item *el= row->element_index(i); + not_null_consts&= el->const_item() & !el->is_null(); } - if (consts && + if (not_null_consts && (intervals= - (interval_range*) sql_alloc(sizeof(interval_range)*(row->cols()-1)))) + (interval_range*) sql_alloc(sizeof(interval_range) * (rows - 1)))) { if (use_decimal_comparison) { - for (uint i=1 ; i < row->cols(); i++) + for (uint i= 1; i < rows; i++) { Item *el= row->element_index(i); interval_range *range= intervals + (i-1); @@ -1735,7 +1758,7 @@ void Item_func_interval::fix_length_and_dec() } else { - for (uint i=1 ; i < row->cols(); i++) + for (uint i= 1; i < rows; i++) { intervals[i-1].dbl= row->element_index(i)->val_real(); } @@ -1826,12 +1849,22 @@ longlong Item_func_interval::val_int() ((el->result_type() == DECIMAL_RESULT) || (el->result_type() == INT_RESULT))) { - my_decimal e_dec_buf, *e_dec= row->element_index(i)->val_decimal(&e_dec_buf); + my_decimal e_dec_buf, *e_dec= el->val_decimal(&e_dec_buf); + /* Skip NULL ranges. */ + if (el->null_value) + continue; if (my_decimal_cmp(e_dec, dec) > 0) - return i-1; + return i - 1; + } + else + { + double val= el->val_real(); + /* Skip NULL ranges. */ + if (el->null_value) + continue; + if (val > value) + return i - 1; } - else if (row->element_index(i)->val_real() > value) - return i-1; } return i-1; } @@ -1943,16 +1976,16 @@ void Item_func_between::fix_length_and_dec() thd->lex->sql_command != SQLCOM_CREATE_VIEW && thd->lex->sql_command != SQLCOM_SHOW_CREATE) { - Field *field=((Item_field*) (args[0]->real_item()))->field; - if (field->can_be_compared_as_longlong()) + Item_field *field_item= (Item_field*) (args[0]->real_item()); + if (field_item->field->can_be_compared_as_longlong()) { /* The following can't be recoded with || as convert_constant_item changes the argument */ - if (convert_constant_item(thd, field,&args[1])) + if (convert_constant_item(thd, field_item, &args[1])) cmp_type=INT_RESULT; // Works for all types. - if (convert_constant_item(thd, field,&args[2])) + if (convert_constant_item(thd, field_item, &args[2])) cmp_type=INT_RESULT; // Works for all types. } } @@ -2610,6 +2643,23 @@ bool Item_func_case::fix_fields(THD *thd, Item **ref) } +void Item_func_case::agg_str_lengths(Item* arg) +{ + set_if_bigger(max_length, arg->max_length); + set_if_bigger(decimals, arg->decimals); + unsigned_flag= unsigned_flag && arg->unsigned_flag; +} + + +void Item_func_case::agg_num_lengths(Item *arg) +{ + uint len= my_decimal_length_to_precision(arg->max_length, arg->decimals, + arg->unsigned_flag) - arg->decimals; + set_if_bigger(max_length, len); + set_if_bigger(decimals, arg->decimals); + unsigned_flag= unsigned_flag && arg->unsigned_flag; +} + void Item_func_case::fix_length_and_dec() { @@ -2673,15 +2723,22 @@ void Item_func_case::fix_length_and_dec() max_length=0; decimals=0; - for (uint i=0 ; i < ncases ; i+=2) + unsigned_flag= TRUE; + if (cached_result_type == STRING_RESULT) { - set_if_bigger(max_length,args[i+1]->max_length); - set_if_bigger(decimals,args[i+1]->decimals); + for (uint i= 0; i < ncases; i+= 2) + agg_str_lengths(args[i + 1]); + if (else_expr_num != -1) + agg_str_lengths(args[else_expr_num]); } - if (else_expr_num != -1) + else { - set_if_bigger(max_length,args[else_expr_num]->max_length); - set_if_bigger(decimals,args[else_expr_num]->decimals); + for (uint i= 0; i < ncases; i+= 2) + agg_num_lengths(args[i + 1]); + if (else_expr_num != -1) + agg_num_lengths(args[else_expr_num]); + max_length= my_decimal_precision_to_length(max_length + decimals, decimals, + unsigned_flag); } } @@ -2885,7 +2942,7 @@ static inline int cmp_ulongs (ulonglong a_val, ulonglong b_val) SYNOPSIS cmp_longlong() - cmp_arg an argument passed to the calling function (qsort2) + cmp_arg an argument passed to the calling function (my_qsort2) a left argument b right argument @@ -3563,13 +3620,13 @@ void Item_func_in::fix_length_and_dec() thd->lex->sql_command != SQLCOM_SHOW_CREATE && cmp_type != INT_RESULT) { - Field *field= ((Item_field*) (args[0]->real_item()))->field; - if (field->can_be_compared_as_longlong()) + Item_field *field_item= (Item_field*) (args[0]->real_item()); + if (field_item->field->can_be_compared_as_longlong()) { bool all_converted= TRUE; for (arg=args+1, arg_end=args+arg_count; arg != arg_end ; arg++) { - if (!convert_constant_item (thd, field, &arg[0])) + if (!convert_constant_item (thd, field_item, &arg[0])) all_converted= FALSE; } if (all_converted) @@ -4394,6 +4451,51 @@ void Item_func_like::cleanup() #ifdef USE_REGEX bool +Item_func_regex::regcomp(bool send_error) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[1]->val_str(&tmp); + int error; + + if (args[1]->null_value) + return TRUE; + + if (regex_compiled) + { + if (!stringcmp(res, &prev_regexp)) + return FALSE; + prev_regexp.copy(*res); + my_regfree(&preg); + regex_compiled= 0; + } + + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) + return TRUE; + res= &conv; + } + + if ((error= my_regcomp(&preg, res->c_ptr_safe(), + regex_lib_flags, regex_lib_charset))) + { + if (send_error) + { + (void) my_regerror(error, &preg, buff, sizeof(buff)); + my_error(ER_REGEXP_ERROR, MYF(0), buff); + } + return TRUE; + } + regex_compiled= 1; + return FALSE; +} + + +bool Item_func_regex::fix_fields(THD *thd, Item **ref) { DBUG_ASSERT(fixed == 0); @@ -4409,35 +4511,34 @@ Item_func_regex::fix_fields(THD *thd, Item **ref) if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1)) return TRUE; + regex_lib_flags= (cmp_collation.collation->state & + (MY_CS_BINSORT | MY_CS_CSSORT)) ? + REG_EXTENDED | REG_NOSUB : + REG_EXTENDED | REG_NOSUB | REG_ICASE; + /* + If the case of UCS2 and other non-ASCII character sets, + we will convert patterns and strings to UTF8. + */ + regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ? + &my_charset_utf8_general_ci : + cmp_collation.collation; + used_tables_cache=args[0]->used_tables() | args[1]->used_tables(); not_null_tables_cache= (args[0]->not_null_tables() | args[1]->not_null_tables()); const_item_cache=args[0]->const_item() && args[1]->const_item(); if (!regex_compiled && args[1]->const_item()) { - char buff[MAX_FIELD_WIDTH]; - String tmp(buff,sizeof(buff),&my_charset_bin); - String *res=args[1]->val_str(&tmp); if (args[1]->null_value) { // Will always return NULL maybe_null=1; fixed= 1; return FALSE; } - int error; - if ((error= my_regcomp(&preg,res->c_ptr(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation))) - { - (void) my_regerror(error,&preg,buff,sizeof(buff)); - my_error(ER_REGEXP_ERROR, MYF(0), buff); + if (regcomp(TRUE)) return TRUE; - } - regex_compiled=regex_is_const=1; - maybe_null=args[0]->maybe_null; + regex_is_const= 1; + maybe_null= args[0]->maybe_null; } else maybe_null=1; @@ -4450,47 +4551,25 @@ longlong Item_func_regex::val_int() { DBUG_ASSERT(fixed == 1); char buff[MAX_FIELD_WIDTH]; - String *res, tmp(buff,sizeof(buff),&my_charset_bin); + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[0]->val_str(&tmp); - res=args[0]->val_str(&tmp); - if (args[0]->null_value) - { - null_value=1; + if ((null_value= (args[0]->null_value || + (!regex_is_const && regcomp(FALSE))))) return 0; - } - if (!regex_is_const) - { - char buff2[MAX_FIELD_WIDTH]; - String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin); - res2= args[1]->val_str(&tmp2); - if (args[1]->null_value) + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) { - null_value=1; + null_value= 1; return 0; } - if (!regex_compiled || stringcmp(res2,&prev_regexp)) - { - prev_regexp.copy(*res2); - if (regex_compiled) - { - my_regfree(&preg); - regex_compiled=0; - } - if (my_regcomp(&preg,res2->c_ptr_safe(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation)) - { - null_value=1; - return 0; - } - regex_compiled=1; - } + res= &conv; } - null_value=0; return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1; } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index e9aeef7fc3e..188d87a69ca 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -742,7 +742,7 @@ public: virtual uchar *get_value(Item *item)=0; void sort() { - qsort2(base,used_count,size,compare,collation); + my_qsort2(base,used_count,size,compare,collation); } int find(Item *item); @@ -1145,6 +1145,8 @@ public: Item *find_item(String *str); CHARSET_INFO *compare_collation() { return cmp_collation.collation; } void cleanup(); + void agg_str_lengths(Item *arg); + void agg_num_lengths(Item *arg); }; /* @@ -1382,6 +1384,10 @@ class Item_func_regex :public Item_bool_func bool regex_is_const; String prev_regexp; DTCollation cmp_collation; + CHARSET_INFO *regex_lib_charset; + int regex_lib_flags; + String conv; + bool regcomp(bool send_error); public: Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b), regex_compiled(0),regex_is_const(0) {} diff --git a/sql/item_func.cc b/sql/item_func.cc index e2551979202..c89474f46b3 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -3727,13 +3727,12 @@ longlong Item_func_sleep::val_int() break; error= 0; } - + pthread_mutex_unlock(&LOCK_user_locks); pthread_mutex_lock(&thd->mysys_var->mutex); thd->mysys_var->current_mutex= 0; thd->mysys_var->current_cond= 0; pthread_mutex_unlock(&thd->mysys_var->mutex); - pthread_mutex_unlock(&LOCK_user_locks); pthread_cond_destroy(&cond); return test(!error); // Return 1 killed @@ -3848,9 +3847,12 @@ Item_func_set_user_var::fix_length_and_dec() bool Item_func_set_user_var::register_field_in_read_map(uchar *arg) { - TABLE *table= (TABLE *) arg; - if (result_field->table == table || !table) - bitmap_set_bit(result_field->table->read_set, result_field->field_index); + if (result_field) + { + TABLE *table= (TABLE *) arg; + if (result_field->table == table || !table) + bitmap_set_bit(result_field->table->read_set, result_field->field_index); + } return 0; } diff --git a/sql/item_func.h b/sql/item_func.h index 87894810b7f..e09b584de95 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -789,7 +789,7 @@ public: max_length= args[0]->max_length; decimals=args[0]->decimals; /* The item could be a NULL constant. */ - null_value= args[0]->null_value; + null_value= args[0]->is_null(); } }; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 03c65c9d654..a7624c5bbcd 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -356,10 +356,35 @@ String *Item_func_concat::val_str(String *str) } else { // Two big const strings - if (tmp_value.alloc(max_length) || - tmp_value.copy(*res) || - tmp_value.append(*res2)) + /* + NOTE: We should be prudent in the initial allocation unit -- the + size of the arguments is a function of data distribution, which + can be any. Instead of overcommitting at the first row, we grow + the allocated amount by the factor of 2. This ensures that no + more than 25% of memory will be overcommitted on average. + */ + + uint concat_len= res->length() + res2->length(); + + if (tmp_value.alloced_length() < concat_len) + { + if (tmp_value.alloced_length() == 0) + { + if (tmp_value.alloc(concat_len)) + goto null; + } + else + { + uint new_len = max(tmp_value.alloced_length() * 2, concat_len); + + if (tmp_value.realloc(new_len)) + goto null; + } + } + + if (tmp_value.copy(*res) || tmp_value.append(*res2)) goto null; + res= &tmp_value; use_as_buff=str; } @@ -679,8 +704,33 @@ String *Item_func_concat_ws::val_str(String *str) } else { // Two big const strings - if (tmp_value.alloc(max_length) || - tmp_value.copy(*res) || + /* + NOTE: We should be prudent in the initial allocation unit -- the + size of the arguments is a function of data distribution, which can + be any. Instead of overcommitting at the first row, we grow the + allocated amount by the factor of 2. This ensures that no more than + 25% of memory will be overcommitted on average. + */ + + uint concat_len= res->length() + sep_str->length() + res2->length(); + + if (tmp_value.alloced_length() < concat_len) + { + if (tmp_value.alloced_length() == 0) + { + if (tmp_value.alloc(concat_len)) + goto null; + } + else + { + uint new_len = max(tmp_value.alloced_length() * 2, concat_len); + + if (tmp_value.realloc(new_len)) + goto null; + } + } + + if (tmp_value.copy(*res) || tmp_value.append(*sep_str) || tmp_value.append(*res2)) goto null; diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 3608568bd5e..0d7cd66acff 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -657,7 +657,12 @@ public: } String* val_str(String* str); const char *func_name() const { return "inet_ntoa"; } - void fix_length_and_dec() { decimals = 0; max_length=3*8+7; maybe_null=1;} + void fix_length_and_dec() + { + decimals= 0; + max_length= 3 * 8 + 7; + maybe_null= 1; + } }; class Item_func_quote :public Item_str_func diff --git a/sql/item_sum.cc b/sql/item_sum.cc index a7574cf4d60..733b6e1e460 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -2849,45 +2849,51 @@ String *Item_sum_udf_str::val_str(String *str) concat of values from "group by" operation BUGS - DISTINCT and ORDER BY only works if ORDER BY uses all fields and only fields - in expression list Blobs doesn't work with DISTINCT or ORDER BY *****************************************************************************/ -/* - function of sort for syntax: - GROUP_CONCAT(DISTINCT expr,...) + + +/** + Compares the values for fields in expr list of GROUP_CONCAT. + @note + + GROUP_CONCAT([DISTINCT] expr [,expr ...] + [ORDER BY {unsigned_integer | col_name | expr} + [ASC | DESC] [,col_name ...]] + [SEPARATOR str_val]) + + @return + @retval -1 : key1 < key2 + @retval 0 : key1 = key2 + @retval 1 : key1 > key2 */ -int group_concat_key_cmp_with_distinct(void* arg, uchar* key1, - uchar* key2) +int group_concat_key_cmp_with_distinct(void* arg, const void* key1, + const void* key2) { - Item_func_group_concat* grp_item= (Item_func_group_concat*)arg; - TABLE *table= grp_item->table; - Item **field_item, **end; + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + TABLE *table= item_func->table; - for (field_item= grp_item->args, end= field_item + grp_item->arg_count_field; - field_item < end; - field_item++) + for (uint i= 0; i < item_func->arg_count_field; i++) { + Item *item= item_func->args[i]; + /* + If field_item is a const item then either get_tp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + continue; /* We have to use get_tmp_table_field() instead of real_item()->get_tmp_table_field() because we want the field in the temporary table, not the original field */ - Field *field= (*field_item)->get_tmp_table_field(); - /* - If field_item is a const item then either get_tmp_table_field returns 0 - or it is an item over a const table. - */ - if (field && !(*field_item)->const_item()) - { - int res; - uint offset= (field->offset(field->table->record[0]) - - table->s->null_bytes); - if ((res= field->cmp(key1 + offset, key2 + offset))) - return res; - } + Field *field= item->get_tmp_table_field(); + int res; + uint offset= field->offset(field->table->record[0])-table->s->null_bytes; + if((res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset))) + return res; } return 0; } @@ -2898,7 +2904,8 @@ int group_concat_key_cmp_with_distinct(void* arg, uchar* key1, GROUP_CONCAT(expr,... ORDER BY col,... ) */ -int group_concat_key_cmp_with_order(void* arg, uchar* key1, uchar* key2) +int group_concat_key_cmp_with_order(void* arg, const void* key1, + const void* key2) { Item_func_group_concat* grp_item= (Item_func_group_concat*) arg; ORDER **order_item, **end; @@ -2924,7 +2931,7 @@ int group_concat_key_cmp_with_order(void* arg, uchar* key1, uchar* key2) int res; uint offset= (field->offset(field->table->record[0]) - table->s->null_bytes); - if ((res= field->cmp(key1 + offset, key2 + offset))) + if ((res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset))) return (*order_item)->asc ? res : -res; } } @@ -2938,25 +2945,6 @@ int group_concat_key_cmp_with_order(void* arg, uchar* key1, uchar* key2) /* - function of sort for syntax: - GROUP_CONCAT(DISTINCT expr,... ORDER BY col,... ) - - BUG: - This doesn't work in the case when the order by contains data that - is not part of the field list because tree-insert will not notice - the duplicated values when inserting things sorted by ORDER BY -*/ - -int group_concat_key_cmp_with_distinct_and_order(void* arg,uchar* key1, - uchar* key2) -{ - if (!group_concat_key_cmp_with_distinct(arg,key1,key2)) - return 0; - return(group_concat_key_cmp_with_order(arg,key1,key2)); -} - - -/* Append data from current leaf to item->result */ @@ -3041,7 +3029,7 @@ Item_func_group_concat(Name_resolution_context *context_arg, bool distinct_arg, List<Item> *select_list, SQL_LIST *order_list, String *separator_arg) :tmp_table_param(0), warning(0), - separator(separator_arg), tree(0), table(0), + separator(separator_arg), tree(0), unique_filter(NULL), table(0), order(0), context(context_arg), arg_count_order(order_list ? order_list->elements : 0), arg_count_field(select_list->elements), @@ -3096,6 +3084,7 @@ Item_func_group_concat::Item_func_group_concat(THD *thd, warning(item->warning), separator(item->separator), tree(item->tree), + unique_filter(item->unique_filter), table(item->table), order(item->order), context(item->context), @@ -3146,6 +3135,11 @@ void Item_func_group_concat::cleanup() delete_tree(tree); tree= 0; } + if (unique_filter) + { + delete unique_filter; + unique_filter= NULL; + } if (warning) { char warn_buff[MYSQL_ERRMSG_SIZE]; @@ -3175,6 +3169,8 @@ void Item_func_group_concat::clear() no_appended= TRUE; if (tree) reset_tree(tree); + if (distinct) + unique_filter->reset(); /* No need to reset the table as we never call write_row */ } @@ -3198,9 +3194,19 @@ bool Item_func_group_concat::add() } null_value= FALSE; + bool row_eligible= TRUE; + + if (distinct) + { + /* Filter out duplicate rows. */ + uint count= unique_filter->elements_in_tree(); + unique_filter->unique_add(table->record[0] + table->s->null_bytes); + if (count == unique_filter->elements_in_tree()) + row_eligible= FALSE; + } TREE_ELEMENT *el= 0; // Only for safety - if (tree) + if (row_eligible && tree) el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0, tree->custom_arg); /* @@ -3208,7 +3214,7 @@ bool Item_func_group_concat::add() we can dump the row here in case of GROUP_CONCAT(DISTINCT...) instead of doing tree traverse later. */ - if (!warning_for_row && + if (row_eligible && !warning_for_row && (!tree || (el->count == 1 && distinct && !arg_count_order))) dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this); @@ -3284,7 +3290,6 @@ bool Item_func_group_concat::setup(THD *thd) { List<Item> list; SELECT_LEX *select_lex= thd->lex->current_select; - qsort_cmp2 compare_key; DBUG_ENTER("Item_func_group_concat::setup"); /* @@ -3374,38 +3379,33 @@ bool Item_func_group_concat::setup(THD *thd) table->file->extra(HA_EXTRA_NO_ROWS); table->no_rows= 1; + /* + Need sorting or uniqueness: init tree and choose a function to sort. + Don't reserve space for NULLs: if any of gconcat arguments is NULL, + the row is not added to the result. + */ + uint tree_key_length= table->s->reclength - table->s->null_bytes; - if (distinct || arg_count_order) + if (arg_count_order) { - /* - Need sorting: init tree and choose a function to sort. - Don't reserve space for NULLs: if any of gconcat arguments is NULL, - the row is not added to the result. - */ - uint tree_key_length= table->s->reclength - table->s->null_bytes; - tree= &tree_base; - if (arg_count_order) - { - if (distinct) - compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct_and_order; - else - compare_key= (qsort_cmp2) group_concat_key_cmp_with_order; - } - else - { - compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct; - } /* - Create a tree for sorting. The tree is used to sort and to remove - duplicate values (according to the syntax of this function). If there - is no DISTINCT or ORDER BY clauses, we don't create this tree. + Create a tree for sorting. The tree is used to sort (according to the + syntax of this function). If there is no ORDER BY clause, we don't + create this tree. */ init_tree(tree, (uint) min(thd->variables.max_heap_table_size, thd->variables.sortbuff_size/16), 0, - tree_key_length, compare_key, 0, NULL, (void*) this); + tree_key_length, + group_concat_key_cmp_with_order , 0, NULL, (void*) this); } + if (distinct) + unique_filter= new Unique(group_concat_key_cmp_with_distinct, + (void*)this, + tree_key_length, + thd->variables.max_heap_table_size); + DBUG_RETURN(FALSE); } @@ -3475,3 +3475,10 @@ void Item_func_group_concat::print(String *str) str->append(*separator); str->append(STRING_WITH_LEN("\')")); } + + +Item_func_group_concat::~Item_func_group_concat() +{ + if (unique_filter) + delete unique_filter; +} diff --git a/sql/item_sum.h b/sql/item_sum.h index b3a382012f1..a3582967736 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -1173,11 +1173,22 @@ class Item_func_group_concat : public Item_sum String *separator; TREE tree_base; TREE *tree; + + /** + If DISTINCT is used with this GROUP_CONCAT, this member is used to filter + out duplicates. + @see Item_func_group_concat::setup + @see Item_func_group_concat::add + @see Item_func_group_concat::clear + */ + Unique *unique_filter; TABLE *table; ORDER **order; Name_resolution_context *context; - uint arg_count_order; // total count of ORDER BY items - uint arg_count_field; // count of arguments + /** The number of ORDER BY items. */ + uint arg_count_order; + /** The number of selected items, aka the expr list. */ + uint arg_count_field; uint count_cut_values; bool distinct; bool warning_for_row; @@ -1190,13 +1201,10 @@ class Item_func_group_concat : public Item_sum */ Item_func_group_concat *original; - friend int group_concat_key_cmp_with_distinct(void* arg, uchar* key1, - uchar* key2); - friend int group_concat_key_cmp_with_order(void* arg, uchar* key1, - uchar* key2); - friend int group_concat_key_cmp_with_distinct_and_order(void* arg, - uchar* key1, - uchar* key2); + friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1, + const void* key2); + friend int group_concat_key_cmp_with_order(void* arg, const void* key1, + const void* key2); friend int dump_leaf_key(uchar* key, element_count count __attribute__((unused)), Item_func_group_concat *group_concat_item); @@ -1207,7 +1215,7 @@ public: SQL_LIST *is_order, String *is_separator); Item_func_group_concat(THD *thd, Item_func_group_concat *item); - ~Item_func_group_concat() {} + ~Item_func_group_concat(); void cleanup(); enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;} diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index e7d513e9d6a..56a6480d859 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -3241,38 +3241,42 @@ get_date_time_result_type(const char *format, uint length) void Item_func_str_to_date::fix_length_and_dec() { - char format_buff[64]; - String format_str(format_buff, sizeof(format_buff), &my_charset_bin), *format; maybe_null= 1; decimals=0; cached_field_type= MYSQL_TYPE_DATETIME; max_length= MAX_DATETIME_FULL_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; cached_timestamp_type= MYSQL_TIMESTAMP_NONE; - format= args[1]->val_str(&format_str); - if (!args[1]->null_value && (const_item= args[1]->const_item())) + if ((const_item= args[1]->const_item())) { - cached_format_type= get_date_time_result_type(format->ptr(), - format->length()); - switch (cached_format_type) { - case DATE_ONLY: - cached_timestamp_type= MYSQL_TIMESTAMP_DATE; - cached_field_type= MYSQL_TYPE_DATE; - max_length= MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; - break; - case TIME_ONLY: - case TIME_MICROSECOND: - cached_timestamp_type= MYSQL_TIMESTAMP_TIME; - cached_field_type= MYSQL_TYPE_TIME; - max_length= MAX_TIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; - break; - default: - cached_timestamp_type= MYSQL_TIMESTAMP_DATETIME; - cached_field_type= MYSQL_TYPE_DATETIME; - break; + char format_buff[64]; + String format_str(format_buff, sizeof(format_buff), &my_charset_bin); + String *format= args[1]->val_str(&format_str); + if (!args[1]->null_value) + { + cached_format_type= get_date_time_result_type(format->ptr(), + format->length()); + switch (cached_format_type) { + case DATE_ONLY: + cached_timestamp_type= MYSQL_TIMESTAMP_DATE; + cached_field_type= MYSQL_TYPE_DATE; + max_length= MAX_DATE_WIDTH * MY_CHARSET_BIN_MB_MAXLEN; + break; + case TIME_ONLY: + case TIME_MICROSECOND: + cached_timestamp_type= MYSQL_TIMESTAMP_TIME; + cached_field_type= MYSQL_TYPE_TIME; + max_length= MAX_TIME_WIDTH * MY_CHARSET_BIN_MB_MAXLEN; + break; + default: + cached_timestamp_type= MYSQL_TIMESTAMP_DATETIME; + cached_field_type= MYSQL_TYPE_DATETIME; + break; + } } } } + bool Item_func_str_to_date::get_date(MYSQL_TIME *ltime, uint fuzzy_date) { DATE_TIME_FORMAT date_time_format; @@ -3340,6 +3344,8 @@ bool Item_func_last_day::get_date(MYSQL_TIME *ltime, uint fuzzy_date) ltime->day= days_in_month[month_idx]; if ( month_idx == 1 && calc_days_in_year(ltime->year) == 366) ltime->day= 29; + ltime->hour= ltime->minute= ltime->second= 0; + ltime->second_part= 0; ltime->time_type= MYSQL_TIMESTAMP_DATE; return 0; } diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc index 1a6c15a4d2e..68d85418324 100644 --- a/sql/item_xmlfunc.cc +++ b/sql/item_xmlfunc.cc @@ -2612,35 +2612,27 @@ typedef struct uint level; String *pxml; // parsed XML uint pos[MAX_LEVEL]; // Tag position stack + uint parent; // Offset of the parent of the current node } MY_XML_USER_DATA; -/* - Find the parent node - - SYNOPSYS - Find the parent node, i.e. a tag or attrubute node on the given level. - - RETURN - 1 - success - 0 - failure -*/ -static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level) +static bool +append_node(String *str, MY_XML_NODE *node) { - if (!nitems) - return 0; - - MY_XML_NODE *p, *last= &items[nitems-1]; - for (p= last; p >= items; p--) - { - if (p->level == level && - (p->type == MY_XML_NODE_TAG || - p->type == MY_XML_NODE_ATTR)) - { - return p - items; - } - } - return 0; + /* + If "str" doesn't have space for a new node, + it will allocate two times more space that it has had so far. + (2*len+512) is a heuristic value, + which gave the best performance during tests. + The ideas behind this formula are: + - It allows to have a very small number of reallocs: + about 10 reallocs on a 1Mb-long XML value. + - At the same time, it avoids excessive memory use. + */ + if (str->reserve(sizeof(MY_XML_NODE), 2 * str->length() + 512)) + return TRUE; + str->q_append((const char*) node, sizeof(MY_XML_NODE)); + return FALSE; } @@ -2662,19 +2654,17 @@ extern "C" int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len); int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len) { MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; - MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); - uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); MY_XML_NODE node; + node.parent= data->parent; // Set parent for the new node to old parent + data->parent= numnodes; // Remember current node as new parent data->pos[data->level]= numnodes; node.level= data->level++; node.type= st->current_node_type; // TAG or ATTR node.beg= attr; node.end= attr + len; - node.parent= parent; - data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); - return MY_XML_OK; + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK; } @@ -2695,18 +2685,14 @@ extern "C" int xml_value(MY_XML_PARSER *st,const char *attr, size_t len); int xml_value(MY_XML_PARSER *st,const char *attr, size_t len) { MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; - MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); - uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); - uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); MY_XML_NODE node; + node.parent= data->parent; // Set parent for the new text node to old parent node.level= data->level; node.type= MY_XML_NODE_TEXT; node.beg= attr; node.end= attr + len; - node.parent= parent; - data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); - return MY_XML_OK; + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK; } @@ -2731,6 +2717,7 @@ int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len) data->level--; MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + data->parent= nodes[data->parent].parent; nodes+= data->pos[data->level]; nodes->tagend= st->cur; @@ -2761,6 +2748,7 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf) p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION; user_data.level= 0; user_data.pxml= parsed_xml_buf; + user_data.parent= 0; my_xml_set_enter_handler(&p, xml_enter); my_xml_set_value_handler(&p, xml_value); my_xml_set_leave_handler(&p, xml_leave); diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h index 278c98baf7c..dadbb5ccf42 100644 --- a/sql/item_xmlfunc.h +++ b/sql/item_xmlfunc.h @@ -28,8 +28,16 @@ protected: String tmp_value, pxml; Item *nodeset_func; public: - Item_xml_str_func(Item *a, Item *b): Item_str_func(a,b) {} - Item_xml_str_func(Item *a, Item *b, Item *c): Item_str_func(a,b,c) {} + Item_xml_str_func(Item *a, Item *b): + Item_str_func(a,b) + { + maybe_null= TRUE; + } + Item_xml_str_func(Item *a, Item *b, Item *c): + Item_str_func(a,b,c) + { + maybe_null= TRUE; + } void fix_length_and_dec(); String *parse_xml(String *raw_xml, String *parsed_xml_buf); }; diff --git a/sql/lock.cc b/sql/lock.cc index 29a07858bc1..ef4a0cc3d83 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -847,9 +847,6 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, locks= locks_buf= sql_lock->locks= (THR_LOCK_DATA**) (sql_lock + 1); to= table_buf= sql_lock->table= (TABLE**) (locks + tables * 2); sql_lock->table_count=lock_count; - sql_lock->lock_count=tables; - DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d", - sql_lock->table_count, sql_lock->lock_count)); for (i=0 ; i < count ; i++) { @@ -889,6 +886,23 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, for ( ; org_locks != locks ; org_locks++) (*org_locks)->debug_print_param= (void *) table; } + /* + We do not use 'tables', because there are cases where store_lock() + returns less locks than lock_count() claimed. This can happen when + a FLUSH TABLES tries to abort locks from a MERGE table of another + thread. When that thread has just opened the table, but not yet + attached its children, it cannot return the locks. lock_count() + always returns the number of locks that an attached table has. + This is done to avoid the reverse situation: If lock_count() would + return 0 for a non-attached MERGE table, and that table becomes + attached between the calls to lock_count() and store_lock(), then + we would have allocated too little memory for the lock data. Now + we may allocate too much, but better safe than memory overrun. + And in the FLUSH case, the memory is released quickly anyway. + */ + sql_lock->lock_count= locks - locks_buf; + DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d", + sql_lock->table_count, sql_lock->lock_count)); DBUG_RETURN(sql_lock); } diff --git a/sql/log.cc b/sql/log.cc index 688ed03d5d1..05cae07d930 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2158,13 +2158,9 @@ const char *MYSQL_LOG::generate_name(const char *log_name, { if (!log_name || !log_name[0]) { - /* - TODO: The following should be using fn_format(); We just need to - first change fn_format() to cut the file name if it's too long. - */ - strmake(buff, pidfile_name, FN_REFLEN - 5); - strmov(fn_ext(buff), suffix); - return (const char *)buff; + strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1); + return (const char *) + fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR)); } // get rid of extension if the log is binary to avoid problems if (strip_ext) @@ -2998,10 +2994,10 @@ err: void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident) { uint dir_len = dirname_length(log_file_name); - if (dir_len > FN_REFLEN) + if (dir_len >= FN_REFLEN) dir_len=FN_REFLEN-1; strnmov(buf, log_file_name, dir_len); - strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len); + strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1); } @@ -3569,9 +3565,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) (!binlog_filter->db_ok(local_db))) { VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_PRINT("info",("OPTION_BIN_LOG is %s, db_ok('%s') == %d", - (thd->options & OPTION_BIN_LOG) ? "set" : "clear", - local_db, binlog_filter->db_ok(local_db))); DBUG_RETURN(0); } #endif /* HAVE_REPLICATION */ diff --git a/sql/log.h b/sql/log.h index bef0101c8b5..20a1b7e8e6d 100644 --- a/sql/log.h +++ b/sql/log.h @@ -130,7 +130,13 @@ typedef struct st_log_info my_off_t pos; bool fatal; // if the purge happens to give us a negative offset pthread_mutex_t lock; - st_log_info():fatal(0) { pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST);} + st_log_info() + : index_file_offset(0), index_file_start_offset(0), + pos(0), fatal(0) + { + log_file_name[0] = '\0'; + pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST); + } ~st_log_info() { pthread_mutex_destroy(&lock);} } LOG_INFO; diff --git a/sql/log_event.cc b/sql/log_event.cc index a6d07e72033..2b3037aedcc 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -36,6 +36,64 @@ #define FLAGSTR(V,F) ((V)&(F)?#F" ":"") +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) && !defined(DBUG_OFF) && !defined(_lint) +static const char *HA_ERR(int i) +{ + switch (i) { + case HA_ERR_KEY_NOT_FOUND: return "HA_ERR_KEY_NOT_FOUND"; + case HA_ERR_FOUND_DUPP_KEY: return "HA_ERR_FOUND_DUPP_KEY"; + case HA_ERR_RECORD_CHANGED: return "HA_ERR_RECORD_CHANGED"; + case HA_ERR_WRONG_INDEX: return "HA_ERR_WRONG_INDEX"; + case HA_ERR_CRASHED: return "HA_ERR_CRASHED"; + case HA_ERR_WRONG_IN_RECORD: return "HA_ERR_WRONG_IN_RECORD"; + case HA_ERR_OUT_OF_MEM: return "HA_ERR_OUT_OF_MEM"; + case HA_ERR_NOT_A_TABLE: return "HA_ERR_NOT_A_TABLE"; + case HA_ERR_WRONG_COMMAND: return "HA_ERR_WRONG_COMMAND"; + case HA_ERR_OLD_FILE: return "HA_ERR_OLD_FILE"; + case HA_ERR_NO_ACTIVE_RECORD: return "HA_ERR_NO_ACTIVE_RECORD"; + case HA_ERR_RECORD_DELETED: return "HA_ERR_RECORD_DELETED"; + case HA_ERR_RECORD_FILE_FULL: return "HA_ERR_RECORD_FILE_FULL"; + case HA_ERR_INDEX_FILE_FULL: return "HA_ERR_INDEX_FILE_FULL"; + case HA_ERR_END_OF_FILE: return "HA_ERR_END_OF_FILE"; + case HA_ERR_UNSUPPORTED: return "HA_ERR_UNSUPPORTED"; + case HA_ERR_TO_BIG_ROW: return "HA_ERR_TO_BIG_ROW"; + case HA_WRONG_CREATE_OPTION: return "HA_WRONG_CREATE_OPTION"; + case HA_ERR_FOUND_DUPP_UNIQUE: return "HA_ERR_FOUND_DUPP_UNIQUE"; + case HA_ERR_UNKNOWN_CHARSET: return "HA_ERR_UNKNOWN_CHARSET"; + case HA_ERR_WRONG_MRG_TABLE_DEF: return "HA_ERR_WRONG_MRG_TABLE_DEF"; + case HA_ERR_CRASHED_ON_REPAIR: return "HA_ERR_CRASHED_ON_REPAIR"; + case HA_ERR_CRASHED_ON_USAGE: return "HA_ERR_CRASHED_ON_USAGE"; + case HA_ERR_LOCK_WAIT_TIMEOUT: return "HA_ERR_LOCK_WAIT_TIMEOUT"; + case HA_ERR_LOCK_TABLE_FULL: return "HA_ERR_LOCK_TABLE_FULL"; + case HA_ERR_READ_ONLY_TRANSACTION: return "HA_ERR_READ_ONLY_TRANSACTION"; + case HA_ERR_LOCK_DEADLOCK: return "HA_ERR_LOCK_DEADLOCK"; + case HA_ERR_CANNOT_ADD_FOREIGN: return "HA_ERR_CANNOT_ADD_FOREIGN"; + case HA_ERR_NO_REFERENCED_ROW: return "HA_ERR_NO_REFERENCED_ROW"; + case HA_ERR_ROW_IS_REFERENCED: return "HA_ERR_ROW_IS_REFERENCED"; + case HA_ERR_NO_SAVEPOINT: return "HA_ERR_NO_SAVEPOINT"; + case HA_ERR_NON_UNIQUE_BLOCK_SIZE: return "HA_ERR_NON_UNIQUE_BLOCK_SIZE"; + case HA_ERR_NO_SUCH_TABLE: return "HA_ERR_NO_SUCH_TABLE"; + case HA_ERR_TABLE_EXIST: return "HA_ERR_TABLE_EXIST"; + case HA_ERR_NO_CONNECTION: return "HA_ERR_NO_CONNECTION"; + case HA_ERR_NULL_IN_SPATIAL: return "HA_ERR_NULL_IN_SPATIAL"; + case HA_ERR_TABLE_DEF_CHANGED: return "HA_ERR_TABLE_DEF_CHANGED"; + case HA_ERR_NO_PARTITION_FOUND: return "HA_ERR_NO_PARTITION_FOUND"; + case HA_ERR_RBR_LOGGING_FAILED: return "HA_ERR_RBR_LOGGING_FAILED"; + case HA_ERR_DROP_INDEX_FK: return "HA_ERR_DROP_INDEX_FK"; + case HA_ERR_FOREIGN_DUPLICATE_KEY: return "HA_ERR_FOREIGN_DUPLICATE_KEY"; + case HA_ERR_TABLE_NEEDS_UPGRADE: return "HA_ERR_TABLE_NEEDS_UPGRADE"; + case HA_ERR_TABLE_READONLY: return "HA_ERR_TABLE_READONLY"; + case HA_ERR_AUTOINC_READ_FAILED: return "HA_ERR_AUTOINC_READ_FAILED"; + case HA_ERR_AUTOINC_ERANGE: return "HA_ERR_AUTOINC_ERANGE"; + case HA_ERR_GENERIC: return "HA_ERR_GENERIC"; + case HA_ERR_RECORD_IS_THE_SAME: return "HA_ERR_RECORD_IS_THE_SAME"; + case HA_ERR_LOGGING_IMPOSSIBLE: return "HA_ERR_LOGGING_IMPOSSIBLE"; + case HA_ERR_CORRUPT_EVENT: return "HA_ERR_CORRUPT_EVENT"; + } + return "<unknown error>"; +} +#endif + /* Cache that will automatically be written to a dedicated file on destruction. @@ -114,6 +172,9 @@ private: flag_set m_flags; }; +#ifndef DBUG_OFF +uint debug_not_change_ts_if_art_event= 1; // bug#29309 simulation +#endif /* pretty_print_str() @@ -555,8 +616,32 @@ int Log_event::do_update_pos(Relay_log_info *rli) Matz: I don't think we will need this check with this refactoring. */ if (rli) - rli->stmt_done(log_pos, when); - + { + /* + bug#29309 simulation: resetting the flag to force + wrong behaviour of artificial event to update + rli->last_master_timestamp for only one time - + the first FLUSH LOGS in the test. + */ + DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp", + if (debug_not_change_ts_if_art_event == 1 + && is_artificial_event()) + { + debug_not_change_ts_if_art_event= 0; + }); +#ifndef DBUG_OFF + rli->stmt_done(log_pos, + is_artificial_event() && + debug_not_change_ts_if_art_event > 0 ? 0 : when); +#else + rli->stmt_done(log_pos, is_artificial_event()? 0 : when); +#endif + DBUG_EXECUTE_IF("let_first_flush_log_change_timestamp", + if (debug_not_change_ts_if_art_event == 0) + { + debug_not_change_ts_if_art_event= 2; + }); + } return 0; // Cannot fail currently } @@ -570,7 +655,8 @@ Log_event::do_shall_skip(Relay_log_info *rli) (ulong) server_id, (ulong) ::server_id, rli->replicate_same_server_id, rli->slave_skip_counter)); - if (server_id == ::server_id && !rli->replicate_same_server_id) + if (server_id == ::server_id && !rli->replicate_same_server_id || + rli->slave_skip_counter == 1 && rli->is_in_group()) return EVENT_SKIP_IGNORE; else if (rli->slave_skip_counter > 0) return EVENT_SKIP_COUNT; @@ -1227,6 +1313,16 @@ void Log_event::print_timestamp(IO_CACHE* file, time_t* ts) #endif /* MYSQL_CLIENT */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +inline Log_event::enum_skip_reason +Log_event::continue_group(Relay_log_info *rli) +{ + if (rli->slave_skip_counter == 1) + return Log_event::EVENT_SKIP_IGNORE; + return Log_event::do_shall_skip(rli); +} +#endif + /************************************************************************** Query_log_event methods **************************************************************************/ @@ -1290,6 +1386,11 @@ static void write_str_with_code_and_len(char **dst, const char *src, bool Query_log_event::write(IO_CACHE* file) { + /** + @todo if catalog can be of length FN_REFLEN==512, then we are not + replicating it correctly, since the length is stored in a byte + /sven + */ uchar buf[QUERY_HEADER_LEN+ 1+4+ // code of flags2 and flags2 1+8+ // code of sql_mode and sql_mode @@ -1516,6 +1617,10 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, time(&end_time); exec_time = (ulong) (end_time - thd_arg->start_time); + /** + @todo this means that if we have no catalog, then it is replicated + as an existing catalog of length zero. is that safe? /sven + */ catalog_len = (catalog) ? (uint32) strlen(catalog) : 0; /* status_vars_len is set just before writing the event */ db_len = (db) ? (uint32) strlen(db) : 0; @@ -1525,7 +1630,7 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, /* If we don't use flags2 for anything else than options contained in thd_arg->options, it would be more efficient to flags2=thd_arg->options - (OPTIONS_WRITTEN_TO_BINLOG would be used only at reading time). + (OPTIONS_WRITTEN_TO_BIN_LOG would be used only at reading time). But it's likely that we don't want to use 32 bits for 3 bits; in the future we will probably want to reclaim the 29 bits. So we need the &. */ @@ -1556,18 +1661,48 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, /* 2 utility functions for the next method */ -/* - Get the pointer for a string (src) that contains the length in - the first byte. Set the output string (dst) to the string value - and place the length of the string in the byte after the string. +/** + Read a string with length from memory. + + This function reads the string-with-length stored at + <code>src</code> and extract the length into <code>*len</code> and + a pointer to the start of the string into <code>*dst</code>. The + string can then be copied using <code>memcpy()</code> with the + number of bytes given in <code>*len</code>. + + @param src Pointer to variable holding a pointer to the memory to + read the string from. + @param dst Pointer to variable holding a pointer where the actual + string starts. Starting from this position, the string + can be copied using @c memcpy(). + @param len Pointer to variable where the length will be stored. + @param end One-past-the-end of the memory where the string is + stored. + + @return Zero if the entire string can be copied successfully, + @c UINT_MAX if the length could not be read from memory + (that is, if <code>*src >= end</code>), otherwise the + number of bytes that are missing to read the full + string, which happends <code>*dst + *len >= end</code>. */ -static void get_str_len_and_pointer(const Log_event::Byte **src, - const char **dst, - uint *len) -{ - if ((*len= **src)) - *dst= (char *)*src + 1; // Will be copied later - (*src)+= *len + 1; +static int +get_str_len_and_pointer(const Log_event::Byte **src, + const char **dst, + uint *len, + const Log_event::Byte *end) +{ + if (*src >= end) + return -1; // Will be UINT_MAX in two-complement arithmetics + uint length= **src; + if (length > 0) + { + if (*src + length >= end) + return *src + length - end + 1; // Number of bytes missing + *dst= (char *)*src + 1; // Will be copied later + } + *len= length; + *src+= length + 1; + return 0; } static void copy_str_and_move(const char **src, @@ -1580,6 +1715,46 @@ static void copy_str_and_move(const char **src, *(*dst)++= 0; } + +#ifndef DBUG_OFF +static char const * +code_name(int code) +{ + static char buf[255]; + switch (code) { + case Q_FLAGS2_CODE: return "Q_FLAGS2_CODE"; + case Q_SQL_MODE_CODE: return "Q_SQL_MODE_CODE"; + case Q_CATALOG_CODE: return "Q_CATALOG_CODE"; + case Q_AUTO_INCREMENT: return "Q_AUTO_INCREMENT"; + case Q_CHARSET_CODE: return "Q_CHARSET_CODE"; + case Q_TIME_ZONE_CODE: return "Q_TIME_ZONE_CODE"; + case Q_CATALOG_NZ_CODE: return "Q_CATALOG_NZ_CODE"; + case Q_LC_TIME_NAMES_CODE: return "Q_LC_TIME_NAMES_CODE"; + case Q_CHARSET_DATABASE_CODE: return "Q_CHARSET_DATABASE_CODE"; + } + sprintf(buf, "CODE#%d", code); + return buf; +} +#endif + +/** + Macro to check that there is enough space to read from memory. + + @param PTR Pointer to memory + @param END End of memory + @param CNT Number of bytes that should be read. + */ +#define CHECK_SPACE(PTR,END,CNT) \ + do { \ + DBUG_PRINT("info", ("Read %s", code_name(pos[-1]))); \ + DBUG_ASSERT((PTR) + (CNT) <= (END)); \ + if ((PTR) + (CNT) > (END)) { \ + DBUG_PRINT("info", ("query= 0")); \ + query= 0; \ + DBUG_VOID_RETURN; \ + } \ + } while (0) + /* Query_log_event::Query_log_event() This is used by the SQL slave thread to prepare the event before execution. @@ -1632,6 +1807,19 @@ Query_log_event::Query_log_event(const char* buf, uint event_len, if (tmp) { status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET); + /* + Check if status variable length is corrupt and will lead to very + wrong data. We could be even more strict and require data_len to + be even bigger, but this will suffice to catch most corruption + errors that can lead to a crash. + */ + if (status_vars_len > min(data_len, MAX_SIZE_LOG_EVENT_STATUS)) + { + DBUG_PRINT("info", ("status_vars_len (%u) > data_len (%lu); query= 0", + status_vars_len, data_len)); + query= 0; + DBUG_VOID_RETURN; + } data_len-= status_vars_len; DBUG_PRINT("info", ("Query_log_event has status_vars_len: %u", (uint) status_vars_len)); @@ -1651,6 +1839,7 @@ Query_log_event::Query_log_event(const char* buf, uint event_len, { switch (*pos++) { case Q_FLAGS2_CODE: + CHECK_SPACE(pos, end, 4); flags2_inited= 1; flags2= uint4korr(pos); DBUG_PRINT("info",("In Query_log_event, read flags2: %lu", (ulong) flags2)); @@ -1661,6 +1850,7 @@ Query_log_event::Query_log_event(const char* buf, uint event_len, #ifndef DBUG_OFF char buff[22]; #endif + CHECK_SPACE(pos, end, 8); sql_mode_inited= 1; sql_mode= (ulong) uint8korr(pos); // QQ: Fix when sql_mode is ulonglong DBUG_PRINT("info",("In Query_log_event, read sql_mode: %s", @@ -1669,15 +1859,24 @@ Query_log_event::Query_log_event(const char* buf, uint event_len, break; } case Q_CATALOG_NZ_CODE: - get_str_len_and_pointer(&pos, &catalog, &catalog_len); + DBUG_PRINT("info", ("case Q_CATALOG_NZ_CODE; pos: 0x%lx; end: 0x%lx", + (ulong) pos, (ulong) end)); + if (get_str_len_and_pointer(&pos, &catalog, &catalog_len, end)) + { + DBUG_PRINT("info", ("query= 0")); + query= 0; + DBUG_VOID_RETURN; + } break; case Q_AUTO_INCREMENT: + CHECK_SPACE(pos, end, 4); auto_increment_increment= uint2korr(pos); auto_increment_offset= uint2korr(pos+2); pos+= 4; break; case Q_CHARSET_CODE: { + CHECK_SPACE(pos, end, 6); charset_inited= 1; memcpy(charset, pos, 6); pos+= 6; @@ -1685,20 +1884,29 @@ Query_log_event::Query_log_event(const char* buf, uint event_len, } case Q_TIME_ZONE_CODE: { - get_str_len_and_pointer(&pos, &time_zone_str, &time_zone_len); + if (get_str_len_and_pointer(&pos, &time_zone_str, &time_zone_len, end)) + { + DBUG_PRINT("info", ("Q_TIME_ZONE_CODE: query= 0")); + query= 0; + DBUG_VOID_RETURN; + } break; } case Q_CATALOG_CODE: /* for 5.0.x where 0<=x<=3 masters */ + CHECK_SPACE(pos, end, 1); if ((catalog_len= *pos)) catalog= (char*) pos+1; // Will be copied later + CHECK_SPACE(pos, end, catalog_len + 2); pos+= catalog_len+2; // leap over end 0 catalog_nz= 0; // catalog has end 0 in event break; case Q_LC_TIME_NAMES_CODE: + CHECK_SPACE(pos, end, 2); lc_time_names_number= uint2korr(pos); pos+= 2; break; case Q_CHARSET_DATABASE_CODE: + CHECK_SPACE(pos, end, 2); charset_database_number= uint2korr(pos); pos+= 2; break; @@ -1726,6 +1934,11 @@ Query_log_event::Query_log_event(const char* buf, uint event_len, DBUG_VOID_RETURN; if (catalog_len) // If catalog is given { + /** + @todo we should clean up and do only copy_str_and_move; it + works for both cases. Then we can remove the catalog_nz + flag. /sven + */ if (likely(catalog_nz)) // true except if event comes from 5.0.0|1|2|3. copy_str_and_move(&catalog, &start, catalog_len); else @@ -1738,6 +1951,13 @@ Query_log_event::Query_log_event(const char* buf, uint event_len, if (time_zone_len) copy_str_and_move(&time_zone_str, &start, time_zone_len); + /** + if time_zone_len or catalog_len are 0, then time_zone and catalog + are uninitialized at this point. shouldn't they point to the + zero-length null-terminated strings we allocated space for in the + my_alloc call above? /sven + */ + /* A 2nd variable part; this is common to all versions */ memcpy((char*) start, end, data_len); // Copy db and query start[data_len]= '\0'; // End query with \0 (For safetly) @@ -2200,6 +2420,7 @@ end: */ thd->catalog= 0; thd->set_db(NULL, 0); /* will free the current database */ + DBUG_PRINT("info", ("end: query= 0")); thd->query= 0; // just to be sure thd->query_length= 0; VOID(pthread_mutex_unlock(&LOCK_thread_count)); @@ -2235,6 +2456,30 @@ int Query_log_event::do_update_pos(Relay_log_info *rli) } +Log_event::enum_skip_reason +Query_log_event::do_shall_skip(Relay_log_info *rli) +{ + DBUG_ENTER("Query_log_event::do_shall_skip"); + DBUG_PRINT("debug", ("query: %s; q_len: %d", query, q_len)); + DBUG_ASSERT(query && q_len > 0); + + if (rli->slave_skip_counter > 0) + { + if (strcmp("BEGIN", query) == 0) + { + thd->options|= OPTION_BEGIN; + DBUG_RETURN(Log_event::continue_group(rli)); + } + + if (strcmp("COMMIT", query) == 0 || strcmp("ROLLBACK", query) == 0) + { + thd->options&= ~OPTION_BEGIN; + DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); + } + } + DBUG_RETURN(Log_event::do_shall_skip(rli)); +} + #endif @@ -2774,7 +3019,7 @@ uint Load_log_event::get_query_buffer_length() 21 + sql_ex.field_term_len*4 + 2 + // " FIELDS TERMINATED BY 'str'" 23 + sql_ex.enclosed_len*4 + 2 + // " OPTIONALLY ENCLOSED BY 'str'" 12 + sql_ex.escaped_len*4 + 2 + // " ESCAPED BY 'str'" - 21 + sql_ex.line_term_len*4 + 2 + // " FIELDS TERMINATED BY 'str'" + 21 + sql_ex.line_term_len*4 + 2 + // " LINES TERMINATED BY 'str'" 19 + sql_ex.line_start_len*4 + 2 + // " LINES STARTING BY 'str'" 15 + 22 + // " IGNORE xxx LINES" 3 + (num_fields-1)*2 + field_block_len; // " (field1, field2, ...)" @@ -3871,10 +4116,7 @@ Intvar_log_event::do_shall_skip(Relay_log_info *rli) that we do not change the value of the slave skip counter since it will be decreased by the following insert event. */ - if (rli->slave_skip_counter == 1) - return Log_event::EVENT_SKIP_IGNORE; - else - return Log_event::do_shall_skip(rli); + return continue_group(rli); } #endif @@ -3970,10 +4212,7 @@ Rand_log_event::do_shall_skip(Relay_log_info *rli) that we do not change the value of the slave skip counter since it will be decreased by the following insert event. */ - if (rli->slave_skip_counter == 1) - return Log_event::EVENT_SKIP_IGNORE; - else - return Log_event::do_shall_skip(rli); + return continue_group(rli); } #endif /* !MYSQL_CLIENT */ @@ -4049,6 +4288,17 @@ int Xid_log_event::do_apply_event(Relay_log_info const *rli) "COMMIT /* implicit, from Xid_log_event */"); return end_trans(thd, COMMIT); } + +Log_event::enum_skip_reason +Xid_log_event::do_shall_skip(Relay_log_info *rli) +{ + DBUG_ENTER("Xid_log_event::do_shall_skip"); + if (rli->slave_skip_counter > 0) { + thd->options&= ~OPTION_BEGIN; + DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); + } + DBUG_RETURN(Log_event::do_shall_skip(rli)); +} #endif /* !MYSQL_CLIENT */ @@ -4427,10 +4677,7 @@ User_var_log_event::do_shall_skip(Relay_log_info *rli) that we do not change the value of the slave skip counter since it will be decreased by the following insert event. */ - if (rli->slave_skip_counter == 1) - return Log_event::EVENT_SKIP_IGNORE; - else - return Log_event::do_shall_skip(rli); + return continue_group(rli); } #endif /* !MYSQL_CLIENT */ @@ -5366,6 +5613,19 @@ int Begin_load_query_log_event::get_create_or_append() const #endif /* defined( HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +Log_event::enum_skip_reason +Begin_load_query_log_event::do_shall_skip(Relay_log_info *rli) +{ + /* + If the slave skip counter is 1, then we should not start executing + on the next event. + */ + return continue_group(rli); +} +#endif + + /************************************************************************** Execute_load_query_log_event methods **************************************************************************/ @@ -5374,12 +5634,13 @@ int Begin_load_query_log_event::get_create_or_append() const #ifndef MYSQL_CLIENT Execute_load_query_log_event:: Execute_load_query_log_event(THD *thd_arg, const char* query_arg, - ulong query_length_arg, uint fn_pos_start_arg, - uint fn_pos_end_arg, - enum_load_dup_handling dup_handling_arg, - bool using_trans, bool suppress_use): + ulong query_length_arg, uint fn_pos_start_arg, + uint fn_pos_end_arg, + enum_load_dup_handling dup_handling_arg, + bool using_trans, bool suppress_use, + THD::killed_state killed_err_arg): Query_log_event(thd_arg, query_arg, query_length_arg, using_trans, - suppress_use), + suppress_use, killed_err_arg), file_id(thd_arg->file_id), fn_pos_start(fn_pos_start_arg), fn_pos_end(fn_pos_end_arg), dup_handling(dup_handling_arg) { @@ -5577,6 +5838,10 @@ bool sql_ex_info::write_data(IO_CACHE* file) } else { + /** + @todo This is sensitive to field padding. We should write a + char[7], not an old_sql_ex. /sven + */ old_sql_ex old_ex; old_ex.field_term= *field_term; old_ex.enclosed= *enclosed; @@ -6146,14 +6411,19 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) table->in_use = old_thd; switch (error) { - /* Some recoverable errors */ - case HA_ERR_RECORD_CHANGED: - case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if - tuple does not exist */ - error= 0; case 0: break; + /* Some recoverable errors */ + case HA_ERR_RECORD_CHANGED: + case HA_ERR_RECORD_DELETED: + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_END_OF_FILE: + /* Idempotency support: OK if tuple does not exist */ + DBUG_PRINT("info", ("error: %s", HA_ERR(error))); + error= 0; + break; + default: rli->report(ERROR_LEVEL, thd->net.last_errno, "Error in %s event: row application failed. %s", @@ -6170,6 +6440,10 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) m_curr_row_end. */ + DBUG_PRINT("info", ("error: %d", error)); + DBUG_PRINT("info", ("curr_row: 0x%lu; curr_row_end: 0x%lu; rows_end: 0x%lu", + (ulong) m_curr_row, (ulong) m_curr_row_end, (ulong) m_rows_end)); + if (!m_curr_row_end && !error) unpack_current_row(rli); @@ -6469,6 +6743,16 @@ void Rows_log_event::print_helper(FILE *file, data) in the table map are initialized as zero (0). The array size is the same as the columns for the table on the slave. + Additionally, values saved for field metadata on the master are saved as a + string of bytes (uchar) in the binlog. A field may require 1 or more bytes + to store the information. In cases where values require multiple bytes + (e.g. values > 255), the endian-safe methods are used to properly encode + the values on the master and decode them on the slave. When the field + metadata values are captured on the slave, they are stored in an array of + type uint16. This allows the least number of casts to prevent casting bugs + when the field metadata is used in comparisons of field attributes. When + the field metadata is used for calculating addresses in pointer math, the + type used is uint32. */ /** @@ -6866,10 +7150,7 @@ Table_map_log_event::do_shall_skip(Relay_log_info *rli) If the slave skip counter is 1, then we should not start executing on the next event. */ - if (rli->slave_skip_counter == 1) - return Log_event::EVENT_SKIP_IGNORE; - else - return Log_event::do_shall_skip(rli); + return continue_group(rli); } int Table_map_log_event::do_update_pos(Relay_log_info *rli) @@ -7383,6 +7664,9 @@ static bool record_compare(TABLE *table) records. Check that the other engines also return correct records. */ + DBUG_DUMP("record[0]", table->record[0], table->s->reclength); + DBUG_DUMP("record[1]", table->record[1], table->s->reclength); + bool result= FALSE; uchar saved_x[2], saved_filler[2]; @@ -7471,7 +7755,7 @@ record_compare_exit: int Rows_log_event::find_row(const Relay_log_info *rli) { - DBUG_ENTER("find_row"); + DBUG_ENTER("Rows_log_event::find_row"); DBUG_ASSERT(m_table && m_table->in_use != NULL); @@ -7700,7 +7984,7 @@ int Rows_log_event::find_row(const Relay_log_info *rli) DBUG_DUMP("record found", table->record[0], table->s->reclength); table->file->ha_rnd_end(); - DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0); + DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == HA_ERR_RECORD_DELETED || error == 0); DBUG_RETURN(error); } @@ -7900,7 +8184,15 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) int error= find_row(rli); if (error) + { + /* + We need to read the second image in the event of error to be + able to skip to the next pair of updates + */ + m_curr_row= m_curr_row_end; + unpack_current_row(rli); return error; + } /* This is the situation after locating BI: diff --git a/sql/log_event.h b/sql/log_event.h index 0c66d1b190f..4bd496af2a4 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -18,8 +18,10 @@ @{ @file - - Binary log event definitions. + + @brief Binary log event definitions. This includes generic code + common to all types of log events, as well as specific code for each + type of log event. */ @@ -37,6 +39,23 @@ #include "rpl_reporting.h" #endif +/** + Either assert or return an error. + + In debug build, the condition will be checked, but in non-debug + builds, the error code given will be returned instead. + + @param COND Condition to check + @param ERRNO Error number to return in non-debug builds +*/ +#ifdef DBUG_OFF +#define ASSERT_OR_RETURN_ERROR(COND, ERRNO) \ + do { if (!(COND)) return ERRNO; } while (0) +#else +#define ASSERT_OR_RETURN_ERROR(COND, ERRNO) \ + DBUG_ASSERT(COND) +#endif + #define LOG_READ_EOF -1 #define LOG_READ_BOGUS -2 #define LOG_READ_IO -3 @@ -394,15 +413,19 @@ struct sql_ex_info #define LOG_EVENT_BINLOG_IN_USE_F 0x1 -/* - If the query depends on the thread (for example: TEMPORARY TABLE). - Currently this is used by mysqlbinlog to know it must print - SET @@PSEUDO_THREAD_ID=xx; before the query (it would not hurt to print it - for every query but this would be slow). +/** + @def LOG_EVENT_THREAD_SPECIFIC_F + + If the query depends on the thread (for example: TEMPORARY TABLE). + Currently this is used by mysqlbinlog to know it must print + SET @@PSEUDO_THREAD_ID=xx; before the query (it would not hurt to print it + for every query but this would be slow). */ #define LOG_EVENT_THREAD_SPECIFIC_F 0x4 -/* +/** + @def LOG_EVENT_SUPPRESS_USE_F + Suppress the generation of 'USE' statements before the actual statement. This flag should be set for any events that does not need the current database set to function correctly. Most notable cases @@ -421,23 +444,26 @@ struct sql_ex_info */ #define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x10 -/* - OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be - written to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written - into the Format_description_log_event, so that if later we don't want - to replicate a variable we did replicate, or the contrary, it's - doable. But it should not be too hard to decide once for all of what - we replicate and what we don't, among the fixed 32 bits of - thd->options. - I (Guilhem) have read through every option's usage, and it looks like - OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones - which alter how the query modifies the table. It's good to replicate - OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may - insert data slower than the master, in InnoDB. - OPTION_BIG_SELECTS is not needed (the slave thread runs with - max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed - either, as the manual says (because a too big in-memory temp table is - automatically written to disk). +/** + @def OPTIONS_WRITTEN_TO_BIN_LOG + + OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must + be written to the binlog. OPTIONS_WRITTEN_TO_BIN_LOG could be + written into the Format_description_log_event, so that if later we + don't want to replicate a variable we did replicate, or the + contrary, it's doable. But it should not be too hard to decide once + for all of what we replicate and what we don't, among the fixed 32 + bits of thd->options. + + I (Guilhem) have read through every option's usage, and it looks + like OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only + ones which alter how the query modifies the table. It's good to + replicate OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the + slave may insert data slower than the master, in InnoDB. + OPTION_BIG_SELECTS is not needed (the slave thread runs with + max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed + either, as the manual says (because a too big in-memory temp table + is automatically written to disk). */ #define OPTIONS_WRITTEN_TO_BIN_LOG \ (OPTION_AUTO_IS_NULL | OPTION_NO_FOREIGN_KEY_CHECKS | \ @@ -452,6 +478,11 @@ struct sql_ex_info #endif #undef EXPECTED_OPTIONS /* You shouldn't use this one */ +/** + @enum Log_event_type + + Enumeration type for the different types of log events. +*/ enum Log_event_type { /* @@ -612,13 +643,90 @@ typedef struct st_print_event_info #endif -/***************************************************************************** - - Log_event class +/** + @class Log_event This is the abstract base class for binary log events. - - ****************************************************************************/ + + @section Log_event_binary_format Binary Format + + Any Log_event saved on disk consists of the following three + components. + + @li Common-Header + @li Post-Header + @li Body + + The Common-Header, documented below, always has the same form and + length within one version of MySQL. Each event type specifies a + form and length of the Post-Header common to all events of the type. + The Body may be of different form and length even for different + events of the same type. The binary formats of Post-Header and Body + are documented separately in each subclass. The binary format of + Common-Header is as follows. + + <table> + <caption>Common-Header</caption> + + <tr> + <th>Name</th> + <th>Format<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>timestamp</td> + <td>4 byte unsigned integer</td> + <td>The number of seconds since 1970. + </td> + </tr> + + <tr> + <td>type</td> + <td>1 byte enumeration</td> + <td>See enum #Log_event_type.</td> + </tr> + + <tr> + <td>master_id</td> + <td>4 byte integer</td> + <td>Server ID of the server that created the event.</td> + </tr> + + <tr> + <td>total_size</td> + <td>4 byte integer</td> + <td>The total size of this event, in bytes. In other words, this + is the sum of the sizes of Common-Header, Post-Header, and Body. + </td> + </tr> + + <tr> + <td>master_position</td> + <td>4 byte integer</td> + <td>The position of the next event in the master binary log, in + bytes from the beginning of the file. + </td> + </tr> + + <tr> + <td>flags</td> + <td>2 byte bitfield</td> + <td>See Log_event::flags.</td> + </tr> + </table> + + Summing up the numbers above, we see that the total size of the + common header is 19 bytes. + + @subsection Log_event_endianness_and_string_formats Endianness and String Formats + + All numbers, whether they are 16-, 32-, or 64-bit, are stored in + little endian, i.e., the least significant byte first. + + Strings are stored in various formats. The format of each string is + documented separately. +*/ class Log_event { public: @@ -692,8 +800,8 @@ public: */ uint32 server_id; - /* - Some 16 flags. Look above for LOG_EVENT_TIME_F, + /** + Some 16 flags. See the definitions above for LOG_EVENT_TIME_F, LOG_EVENT_FORCED_ROTATE_F, LOG_EVENT_THREAD_SPECIFIC_F, and LOG_EVENT_SUPPRESS_USE_F for notes. */ @@ -871,6 +979,25 @@ public: protected: /** + Helper function to ignore an event w.r.t. the slave skip counter. + + This function can be used inside do_shall_skip() for functions + that cannot end a group. If the slave skip counter is 1 when + seeing such an event, the event shall be ignored, the counter + left intact, and processing continue with the next event. + + A typical usage is: + @code + enum_skip_reason do_shall_skip(Relay_log_info *rli) { + return continue_group(rli); + } + @endcode + + @return Skip reason + */ + enum_skip_reason continue_group(Relay_log_info *rli); + + /** Primitive to apply an event to the database. This is where the change to the database is made. @@ -950,6 +1077,7 @@ protected: #endif }; + /* One class for each type of event. Two constructors for each class: @@ -963,13 +1091,332 @@ protected: mysqlbinlog. This constructor must be format-tolerant. */ -/***************************************************************************** - - Query Log Event class - - Logs SQL queries +/** + @class Query_log_event + + Logs SQL queries. + + @section Query_log_event_binary_format Binary format + + The Post-Header has five components: + + <table> + <caption>Post-Header for Query_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>slave_proxy_id</td> + <td>4 byte unsigned integer</td> + <td>An integer identifying the client thread, which is unique on + the server. (Note, however, that two threads on different servers + may have the same slave_proxy_id.) This is used when a client + thread creates a temporary table. Temporary tables are local to + the client, and the slave_proxy_id is used to distinguish + temporary tables belonging to different clients. + </td> + </tr> + + <tr> + <td>exec_time</td> + <td>4 byte integer</td> + <td>???TODO</td> + </tr> + + <tr> + <td>db_len</td> + <td>1 byte integer</td> + <td>The length of the name of the currently selected + database. + </td> + </tr> + + <tr> + <td>error_code</td> + <td>2 byte integer</td> + <td>Error code generated by the master. If the master fails, the + slave will fail with the same error code, except for the error + codes ER_DB_CREATE_EXISTS==1007 and ER_DB_DROP_EXISTS==1008. + </td> + </tr> + + <tr> + <td>status_vars_len</td> + <td>2 byte integer</td> + <td>The length of the status_vars block of the Body, in bytes. See + <a href="#query_log_event_status_vars">below</a>. + </td> + </tr> + + <tr> + <td>Post-Header-For-Derived</td> + <td>0 bytes</td> + <td>This field is only written by the subclass + Execute_load_query_log_event. In this base class, it takes 0 + bytes. See separate documentation for + Execute_load_query_log_event. + </td> + </tr> + </table> + + The Body has the following components: + + <table> + <caption>Body for Query_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td><a name="query_log_event_status_vars" /> status_vars</td> + <td>variable length</td> + <td>Zero or more status variables. Each status variable consists + of one byte identifying the variable stored, followed by the value + of the variable. The possible variables are listed separately in + the table below. MySQL always writes events in the order defined + below; however, it is capable of reading them in any order. + </td> + </tr> + + <tr> + <td>db</td> + <td>db_len+1</td> + <td>The currently selected database, as a null-terminated string. + + (The trailing zero is redundant since the length is already known; + it is db_len from Post-Header.) + </td> + </tr> + + <tr> + <td>query</td> + <td>variable length string without trailing zero, extending to the + end of the event (determined by the length field of the + Common-Header) + </td> + <td>The SQL query.</td> + </tr> + </table> + + The following table lists the status variables that may appear in + the status_vars field. + + <table> + <caption>Status variables for Query_log_event</caption> + + <tr> + <th>Status variable</th> + <th>1-byte identifier</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>flags2</td> + <td>Q_FLAGS2_CODE == 0</td> + <td>4 byte bitfield</td> + <td>The flags in thd->options, binary AND-ed with + OPTIONS_WRITTEN_TO_BIN_LOG. The thd->options bitfield contains + options for SELECT. OPTIONS_WRITTEN identifies those options that + need to be written to the binlog (not all do). Specifically, + OPTIONS_WRITTEN_TO_BIN_LOG equals (OPTION_AUTO_IS_NULL | + OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS | + OPTION_NOT_AUTOCOMMIT), or 0x0c084000 in hex. + + These flags correspond to the SQL variables SQL_AUTO_IS_NULL, + FOREIGN_KEY_CHECKS, UNIQUE_CHECKS, and AUTOCOMMIT, documented in + the "SET Syntax" section of the MySQL Manual. + + This field is always written to the binlog in version >= 5.0, and + never written in version < 5.0. + </td> + </tr> + + <tr> + <td>sql_mode</td> + <td>Q_SQL_MODE_CODE == 1</td> + <td>8 byte integer</td> + <td>The sql_mode variable. See the section "SQL Modes" in the + MySQL manual, and see mysql_priv.h for a list of the possible + flags. Currently (2007-10-04), the following flags are available: + <pre> + MODE_REAL_AS_FLOAT==0x1 + MODE_PIPES_AS_CONCAT==0x2 + MODE_ANSI_QUOTES==0x4 + MODE_IGNORE_SPACE==0x8 + MODE_NOT_USED==0x10 + MODE_ONLY_FULL_GROUP_BY==0x20 + MODE_NO_UNSIGNED_SUBTRACTION==0x40 + MODE_NO_DIR_IN_CREATE==0x80 + MODE_POSTGRESQL==0x100 + MODE_ORACLE==0x200 + MODE_MSSQL==0x400 + MODE_DB2==0x800 + MODE_MAXDB==0x1000 + MODE_NO_KEY_OPTIONS==0x2000 + MODE_NO_TABLE_OPTIONS==0x4000 + MODE_NO_FIELD_OPTIONS==0x8000 + MODE_MYSQL323==0x10000 + MODE_MYSQL323==0x20000 + MODE_MYSQL40==0x40000 + MODE_ANSI==0x80000 + MODE_NO_AUTO_VALUE_ON_ZERO==0x100000 + MODE_NO_BACKSLASH_ESCAPES==0x200000 + MODE_STRICT_TRANS_TABLES==0x400000 + MODE_STRICT_ALL_TABLES==0x800000 + MODE_NO_ZERO_IN_DATE==0x1000000 + MODE_NO_ZERO_DATE==0x2000000 + MODE_INVALID_DATES==0x4000000 + MODE_ERROR_FOR_DIVISION_BY_ZERO==0x8000000 + MODE_TRADITIONAL==0x10000000 + MODE_NO_AUTO_CREATE_USER==0x20000000 + MODE_HIGH_NOT_PRECEDENCE==0x40000000 + MODE_PAD_CHAR_TO_FULL_LENGTH==0x80000000 + </pre> + All these flags are replicated from the server. However, all + flags except MODE_NO_DIR_IN_CREATE are honored by the slave; the + slave always preserves its old value of MODE_NO_DIR_IN_CREATE. + For a rationale, see comment in Query_log_event::do_apply_event in + log_event.cc. + + This field is always written to the binlog. + </td> + </tr> + + <tr> + <td>catalog</td> + <td>Q_CATALOG_NZ_CODE == 6</td> + <td>Variable-length string: the length in bytes (1 byte) followed + by the characters (at most 255 bytes) + </td> + <td>Stores the client's current catalog. Every database belongs + to a catalog, the same way that every table belongs to a + database. Currently, there is only one catalog, 'std'. + + This field is written if the length of the catalog is > 0; + otherwise it is not written. + </td> + </tr> + + <tr> + <td>auto_increment</td> + <td>Q_AUTO_INCREMENT == 3</td> + <td>two 2 byte unsigned integers, totally 2+2=4 bytes</td> + + <td>The two variables auto_increment_increment and + auto_increment_offset, in that order. For more information, see + "System variables" in the MySQL manual. + + This field is written if auto_increment>1; otherwise it is not + written. + </td> + </tr> + + <tr> + <td>charset</td> + <td>Q_CHARSET_CODE == 4</td> + <td>three 2-byte unsigned integers (i.e., 6 bytes)</td> + <td>The three variables character_set_client, + collation_connection, and collation_server, in that order. + `character_set_client' is a code identifying the character set and + collation used by the client to encode the query. + `collation_connection' identifies the character set and collation + that the master converts the query to when it receives it; this is + useful when comparing literal strings. `collation_server' is the + default character set and collation used when a new database is + created. + + See also "Connection Character Sets and Collations" in the MySQL + 5.1 manual. + + All three variables are codes identifying a (character set, + collation) pair. To see which codes map to which pairs, run the + query "SELECT id, character_set_name, collation_name FROM + COLLATIONS". + + Cf. Q_CHARSET_DATABASE_NUMBER below. + + This field is always written. + </td> + </tr> + + <tr> + <td>time_zone</td> + <td>Q_TIME_ZONE_CODE == 5</td> + <td>Variable-length string: the length in bytes (1 byte) followed + by the characters (at most 255 bytes). + <td>The time_zone of the master. + + See also "System Variables" and "MySQL Server Time Zone Support" + in the MySQL manual. + + This field is written if the length of the time zone string is > + 0; otherwise, it is not written. + </td> + </tr> + + <tr> + <td>lc_time_names_number</td> + <td>Q_LC_TIME_NAMES_CODE == 7</td> + <td>2 byte integer</td> + <td>A code identifying a table of month and day names. The + mapping from codes to languages is defined in sql_locale.cc. + + This field is written if it is != 0, i.e., if the locale is not + en_US. + </td> + </tr> + + <tr> + <td>charset_database_number</td> + <td>Q_CHARSET_DATABASE_NUMBER == 8</td> + <td>2 byte integer</td> + + <td>The value of the collation_database system variable (in the + source code stored in thd->variables.collation_database), which + holds the code for a (character set, collation) pair as described + above (see Q_CHARSET_CODE). + + `collation_database' was used in old versions (???WHEN). Its + value was loaded when issuing a "use db" command and could be + changed by issuing a "SET collation_database=xxx" command. It + used to affect the "LOAD DATA INFILE" and "CREATE TABLE" commands. + + In newer versions, "CREATE TABLE" has been changed to take the + character set from the database of the created table, rather than + the database of the current database. This makes a difference + when creating a table in another database than the current one. + "LOAD DATA INFILE" has not yet changed to do this, but there are + plans to eventually do it, and to make collation_database + read-only. + + This field is written if it is not 0. + </td> + </tr> + </table> + + @subsection Query_log_event_notes_on_previous_versions Notes on Previous Versions + + @li Status vars were introduced in version 5.0. To read earlier + versions correctly, check the length of the Post-Header. + + @li The status variable Q_CATALOG_CODE == 2 existed in MySQL 5.0.x, + where 0<=x<=3. It was identical to Q_CATALOG_CODE, except that the + string had a trailing '\0'. The '\0' was removed in 5.0.4 since it + was redundant (the string length is stored before the string). The + Q_CATALOG_CODE will never be written by a new master, but can still + be understood by a new slave. + + @li See Q_CHARSET_DATABASE_NUMBER in the table above. - ****************************************************************************/ +*/ class Query_log_event: public Log_event { protected: @@ -1027,7 +1474,7 @@ public: /* 'flags2' is a second set of flags (on top of those in Log_event), for session variables. These are thd->options which is & against a mask - (OPTIONS_WRITTEN_TO_BINLOG). + (OPTIONS_WRITTEN_TO_BIN_LOG). flags2_inited helps make a difference between flags2==0 (3.23 or 4.x master, we don't know flags2, so use the slave server's global options) and flags2==0 (5.0 master, we know this has a meaning of flags all down which @@ -1086,6 +1533,7 @@ public: public: /* !!! Public in this patch to allow old usage */ #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); virtual int do_apply_event(Relay_log_info const *rli); virtual int do_update_pos(Relay_log_info *rli); @@ -1096,13 +1544,16 @@ public: /* !!! Public in this patch to allow old usage */ }; -/***************************************************************************** +/** + @class Muted_query_log_event - Muted Query Log Event class + Pretends to log SQL queries, but doesn't actually do so. - Pretends to Log SQL queries, but doesn't actually do so. + @section Muted_query_log_event_binary_format Binary Format - ****************************************************************************/ + This log event is not stored, and thus the binary format is 0 bytes + long. Note that not even the Common-Header is stored. +*/ class Muted_query_log_event: public Query_log_event { public: @@ -1119,14 +1570,54 @@ public: #ifdef HAVE_REPLICATION -/***************************************************************************** +/** + @class Slave_log_event - Slave Log Event class Note that this class is currently not used at all; no code writes a - Slave_log_event (though some code in repl_failsafe.cc reads Slave_log_event). - So it's not a problem if this code is not maintained. - - ****************************************************************************/ + Slave_log_event (though some code in repl_failsafe.cc reads + Slave_log_event). So it's not a problem if this code is not + maintained. + + @section Slave_log_event_binary_format Binary Format + + This event type has no Post-Header. The Body has the following + four components. + + <table> + <caption>Body for Slave_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>master_pos</td> + <td>8 byte integer</td> + <td>???TODO + </td> + </tr> + + <tr> + <td>master_port</td> + <td>2 byte integer</td> + <td>???TODO</td> + </tr> + + <tr> + <td>master_host</td> + <td>null-terminated string</td> + <td>???TODO</td> + </tr> + + <tr> + <td>master_log</td> + <td>null-terminated string</td> + <td>???TODO</td> + </tr> + </table> +*/ class Slave_log_event: public Log_event { protected: @@ -1165,11 +1656,202 @@ private: #endif /* HAVE_REPLICATION */ -/***************************************************************************** - - Load Log Event class +/** + @class Load_log_event + + This log event corresponds to a "LOAD DATA INFILE" SQL query on the + following form: + + @verbatim + (1) USE db; + (2) LOAD DATA [LOCAL] INFILE 'file_name' + (3) [REPLACE | IGNORE] + (4) INTO TABLE 'table_name' + (5) [FIELDS + (6) [TERMINATED BY 'field_term'] + (7) [[OPTIONALLY] ENCLOSED BY 'enclosed'] + (8) [ESCAPED BY 'escaped'] + (9) ] + (10) [LINES + (11) [TERMINATED BY 'line_term'] + (12) [LINES STARTING BY 'line_start'] + (13) ] + (14) [IGNORE skip_lines LINES] + (15) (field_1, field_2, ..., field_n)@endverbatim + + @section Load_log_event_binary_format Binary Format + + The Post-Header consists of the following six components. + + <table> + <caption>Post-Header for Load_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>slave_proxy_id</td> + <td>4 byte unsigned integer</td> + <td>An integer identifying the client thread, which is unique on + the server. (Note, however, that the same slave_proxy_id may + appear on different servers.) This is used when a client thread + creates a temporary table. Temporary tables are local to the + client, and the slave_proxy_id is used to distinguish temporary + tables belonging to different clients. + </td> + </tr> + + <tr> + <td>exec_time</td> + <td>4 byte unsigned integer</td> + <td>???TODO</td> + </tr> + + <tr> + <td>skip_lines</td> + <td>4 byte unsigned integer</td> + <td>The number on line (14) above, if present, or 0 if line (14) + is left out. + </td> + </tr> + + <tr> + <td>table_name_len</td> + <td>1 byte unsigned integer</td> + <td>The length of 'table_name' on line (4) above.</td> + </tr> + + <tr> + <td>db_len</td> + <td>1 byte unsigned integer</td> + <td>The length of 'db' on line (1) above.</td> + </tr> + + <tr> + <td>num_fields</td> + <td>4 byte unsigned integer</td> + <td>The number n of fields on line (15) above.</td> + </tr> + </table> + + The Body contains the following components. + + <table> + <caption>Body of Load_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>sql_ex</td> + <td>variable length</td> + + <td>Describes the part of the query on lines (3) and + (5)–(13) above. More precisely, it stores the five strings + (on lines) field_term (6), enclosed (7), escaped (8), line_term + (11), and line_start (12); as well as a bitfield indicating the + presence of the keywords REPLACE (3), IGNORE (3), and OPTIONALLY + (7). + + The data is stored in one of two formats, called "old" and "new". + The type field of Common-Header determines which of these two + formats is used: type LOAD_EVENT means that the old format is + used, and type NEW_LOAD_EVENT means that the new format is used. + When MySQL writes a Load_log_event, it uses the new format if at + least one of the five strings is two or more bytes long. + Otherwise (i.e., if all strings are 0 or 1 bytes long), the old + format is used. + + The new and old format differ in the way the five strings are + stored. + + <ul> + <li> In the new format, the strings are stored in the order + field_term, enclosed, escaped, line_term, line_start. Each string + consists of a length (1 byte), followed by a sequence of + characters (0-255 bytes). Finally, a boolean combination of the + following flags is stored in 1 byte: REPLACE_FLAG==0x4, + IGNORE_FLAG==0x8, and OPT_ENCLOSED_FLAG==0x2. If a flag is set, + it indicates the presence of the corresponding keyword in the SQL + query. + + <li> In the old format, we know that each string has length 0 or + 1. Therefore, only the first byte of each string is stored. The + order of the strings is the same as in the new format. These five + bytes are followed by the same 1-byte bitfield as in the new + format. Finally, a 1 byte bitfield called empty_flags is stored. + The low 5 bits of empty_flags indicate which of the five strings + have length 0. For each of the following flags that is set, the + corresponding string has length 0; for the flags that are not set, + the string has length 1: FIELD_TERM_EMPTY==0x1, + ENCLOSED_EMPTY==0x2, LINE_TERM_EMPTY==0x4, LINE_START_EMPTY==0x8, + ESCAPED_EMPTY==0x10. + </ul> + + Thus, the size of the new format is 6 bytes + the sum of the sizes + of the five strings. The size of the old format is always 7 + bytes. + </td> + </tr> + + <tr> + <td>field_lens</td> + <td>num_fields 1-byte unsigned integers</td> + <td>An array of num_fields integers representing the length of + each field in the query. (num_fields is from the Post-Header). + </td> + </tr> + + <tr> + <td>fields</td> + <td>num_fields null-terminated strings</td> + <td>An array of num_fields null-terminated strings, each + representing a field in the query. (The trailing zero is + redundant, since the length are stored in the num_fields array.) + The total length of all strings equals to the sum of all + field_lens, plus num_fields bytes for all the trailing zeros. + </td> + </tr> + + <tr> + <td>table_name</td> + <td>null-terminated string of length table_len+1 bytes</td> + <td>The 'table_name' from the query, as a null-terminated string. + (The trailing zero is actually redundant since the table_len is + known from Post-Header.) + </td> + </tr> + + <tr> + <td>db</td> + <td>null-terminated string of length db_len+1 bytes</td> + <td>The 'db' from the query, as a null-terminated string. + (The trailing zero is actually redundant since the db_len is known + from Post-Header.) + </td> + </tr> + + <tr> + <td>file_name</td> + <td>variable length string without trailing zero, extending to the + end of the event (determined by the length field of the + Common-Header) + </td> + <td>The 'file_name' from the query. + </td> + </tr> + + </table> + + @subsection Load_log_event_notes_on_previous_versions Notes on Previous Versions - ****************************************************************************/ +*/ class Load_log_event: public Log_event { private: @@ -1276,9 +1958,8 @@ public: /* !!! Public in this patch to allow old usage */ extern char server_version[SERVER_VERSION_LENGTH]; -/***************************************************************************** - - Start Log Event_v3 class +/** + @class Start_log_event_v3 Start_log_event_v3 is the Start_log_event of binlog format 3 (MySQL 3.23 and 4.x). @@ -1288,8 +1969,8 @@ extern char server_version[SERVER_VERSION_LENGTH]; MySQL 5.0 whenever it starts sending a new binlog if the requested position is >4 (otherwise if ==4 the event will be sent naturally). - ****************************************************************************/ - + @section Start_log_event_v3_binary_format Binary Format +*/ class Start_log_event_v3: public Log_event { public: @@ -1372,10 +2053,14 @@ protected: }; -/* - For binlog version 4. - This event is saved by threads which read it, as they need it for future - use (to decode the ordinary events). +/** + @class Format_description_log_event + + For binlog version 4. + This event is saved by threads which read it, as they need it for future + use (to decode the ordinary events). + + @section Format_description_log_event_binary_format Binary Format */ class Format_description_log_event: public Start_log_event_v3 @@ -1429,13 +2114,41 @@ protected: }; -/***************************************************************************** +/** + @class Intvar_log_event - Intvar Log Event class + Logs special variables related to auto_increment values. - Logs special variables such as auto_increment values + @section Intvar_log_event_binary_format Binary Format - ****************************************************************************/ + The Post-Header has two components: + + <table> + <caption>Post-Header for Intvar_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>Type</td> + <td>1 byte enumeration</td> + <td>One byte identifying the type of variable stored. Currently, + two identifiers are supported: LAST_INSERT_ID_EVENT==1 and + INSERT_ID_EVENT==2. + </td> + </tr> + + <tr> + <td>value</td> + <td>8 byte unsigned integer</td> + <td>The value of the variable.</td> + </tr> + + </table> +*/ class Intvar_log_event: public Log_event { @@ -1474,16 +2187,24 @@ private: }; -/***************************************************************************** - - Rand Log Event class +/** + @class Rand_log_event Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.0. 4.1.1 does not need it (it's repeatable again) so this event needn't be written in 4.1.1 for PASSWORD() (but the fact that it is written is just a waste, it does not cause bugs). - ****************************************************************************/ + @section Rand_log_event_binary_format Binary Format + This event type has no Post-Header. The Body of this event type has + two components: + + @li seed1 (8 bytes): 64 bit random seed1. + @li seed2 (8 bytes): 64 bit random seed2. + + The state of the random number generation consists of 128 bits, + which are stored internally as two 64-bit numbers. +*/ class Rand_log_event: public Log_event { @@ -1520,14 +2241,14 @@ private: #endif }; -/***************************************************************************** - - Xid Log Event class +/** + @class Xid_log_event Logs xid of the transaction-to-be-committed in the 2pc protocol. Has no meaning in replication, slaves ignore it. - ****************************************************************************/ + @section Xid_log_event_binary_format Binary Format +*/ #ifdef MYSQL_CLIENT typedef ulonglong my_xid; // this line is the same as in handler.h #endif @@ -1559,17 +2280,18 @@ class Xid_log_event: public Log_event private: #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) virtual int do_apply_event(Relay_log_info const *rli); + enum_skip_reason do_shall_skip(Relay_log_info *rli); #endif }; -/***************************************************************************** - - User var Log Event class +/** + @class User_var_log_event Every time a query uses the value of a user variable, a User_var_log_event is written before the Query_log_event, to set the user variable. - ****************************************************************************/ + @section User_var_log_event_binary_format Binary Format +*/ class User_var_log_event: public Log_event { @@ -1611,11 +2333,14 @@ private: }; -/***************************************************************************** +/** + @class Stop_log_event - Stop Log Event class + @section Stop_log_event_binary_format Binary Format - ****************************************************************************/ + The Post-Header and Body for this event type are empty; it only has + the Common-Header. +*/ class Stop_log_event: public Log_event { public: @@ -1651,13 +2376,54 @@ private: #endif }; -/***************************************************************************** - - Rotate Log Event class +/** + @class Rotate_log_event This will be deprecated when we move to using sequence ids. - ****************************************************************************/ + @section Rotate_log_event_binary_format Binary Format + + The Post-Header has one component: + + <table> + <caption>Post-Header for Rotate_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>pos</td> + <td>8 byte integer</td> + <td>???TODO</td> + </tr> + + </table> + + The Body has one component: + + <table> + <caption>Body for Rotate_log_event</caption> + + <tr> + <th>Name</th> + <th>Size<br/></th> + <th>Description</th> + </tr> + + <tr> + <td>new_log_ident</td> + <td>variable length string without trailing zero, extending to the + end of the event (determined by the length field of the + Common-Header) + </td> + <td>???TODO</td> + </tr> + + </table> +*/ class Rotate_log_event: public Log_event { @@ -1704,9 +2470,11 @@ private: /* the classes below are for the new LOAD DATA INFILE logging */ -/***************************************************************************** - Create File Log Event class - ****************************************************************************/ +/** + @class Create_file_log_event + + @section Create_file_log_event_binary_format Binary Format +*/ class Create_file_log_event: public Load_log_event { @@ -1775,11 +2543,11 @@ private: }; -/***************************************************************************** - - Append Block Log Event class +/** + @class Append_block_log_event - ****************************************************************************/ + @section Append_block_log_event_binary_format Binary Format +*/ class Append_block_log_event: public Log_event { @@ -1830,11 +2598,11 @@ private: }; -/***************************************************************************** - - Delete File Log Event class +/** + @class Delete_file_log_event - ****************************************************************************/ + @section Delete_file_log_event_binary_format Binary Format +*/ class Delete_file_log_event: public Log_event { @@ -1871,11 +2639,11 @@ private: }; -/***************************************************************************** - - Execute Load Log Event class +/** + @class Execute_load_log_event - ****************************************************************************/ + @section Delete_file_log_event_binary_format Binary Format +*/ class Execute_load_log_event: public Log_event { @@ -1911,15 +2679,15 @@ private: }; -/*************************************************************************** - - Begin load query Log Event class +/** + @class Begin_load_query_log_event Event for the first block of file to be loaded, its only difference from Append_block event is that this event creates or truncates existing file before writing data. -****************************************************************************/ + @section Begin_load_query_log_event_binary_format Binary Format +*/ class Begin_load_query_log_event: public Append_block_log_event { public: @@ -1937,6 +2705,10 @@ public: *description_event); ~Begin_load_query_log_event() {} Log_event_type get_type_code() { return BEGIN_LOAD_QUERY_EVENT; } +private: +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual enum_skip_reason do_shall_skip(Relay_log_info *rli); +#endif }; @@ -1946,15 +2718,15 @@ public: enum enum_load_dup_handling { LOAD_DUP_ERROR= 0, LOAD_DUP_IGNORE, LOAD_DUP_REPLACE }; -/**************************************************************************** - - Execute load query Log Event class +/** + @class Execute_load_query_log_event Event responsible for LOAD DATA execution, it similar to Query_log_event but before executing the query it substitutes original filename in LOAD DATA query with name of temporary file. -****************************************************************************/ + @section Execute_load_query_log_event_binary_format Binary Format +*/ class Execute_load_query_log_event: public Query_log_event { public: @@ -1972,10 +2744,12 @@ public: #ifndef MYSQL_CLIENT Execute_load_query_log_event(THD* thd, const char* query_arg, - ulong query_length, uint fn_pos_start_arg, - uint fn_pos_end_arg, - enum_load_dup_handling dup_handling_arg, - bool using_trans, bool suppress_use); + ulong query_length, uint fn_pos_start_arg, + uint fn_pos_end_arg, + enum_load_dup_handling dup_handling_arg, + bool using_trans, bool suppress_use, + THD::killed_state + killed_err_arg= THD::KILLED_NO_VALUE); #ifdef HAVE_REPLICATION void pack_info(Protocol* protocol); #endif /* HAVE_REPLICATION */ @@ -2006,6 +2780,11 @@ private: #ifdef MYSQL_CLIENT +/** + @class Unknown_log_event + + @section Unknown_log_event_binary_format Binary Format +*/ class Unknown_log_event: public Log_event { public: @@ -2026,14 +2805,14 @@ public: #endif char *str_to_hex(char *to, const char *from, uint len); -/***************************************************************************** - - Table map log event class +/** + @class Table_map_log_event Create a mapping from a (database name, table name) couple to a table identifier (an integer number). - ****************************************************************************/ + @section Table_map_log_event_binary_format Binary Format +*/ class Table_map_log_event : public Log_event { public: @@ -2143,9 +2922,8 @@ private: }; -/***************************************************************************** - - Row level log event class. +/** + @class Rows_log_event Common base class for all row-containing log events. @@ -2155,7 +2933,8 @@ private: - Write data header and data body to an IO_CACHE. - Provide an interface for adding an individual row to the event. - ****************************************************************************/ + @section Rows_log_event_binary_format Binary Format +*/ class Rows_log_event : public Log_event @@ -2300,7 +3079,7 @@ protected: uchar *m_rows_cur; /* One-after the end of the data */ uchar *m_rows_end; /* One-after the end of the allocated space */ - flag_set m_flags; /* Flags for row-level events */ + flag_set m_flags; /* Flags for row-level events */ /* helper functions */ @@ -2316,8 +3095,11 @@ protected: int unpack_current_row(const Relay_log_info *const rli) { DBUG_ASSERT(m_table); - return ::unpack_row(rli, m_table, m_width, m_curr_row, &m_cols, - &m_curr_row_end, &m_master_reclength); + ASSERT_OR_RETURN_ERROR(m_curr_row < m_rows_end, HA_ERR_CORRUPT_EVENT); + int const result= ::unpack_row(rli, m_table, m_width, m_curr_row, &m_cols, + &m_curr_row_end, &m_master_reclength); + ASSERT_OR_RETURN_ERROR(m_curr_row_end <= m_rows_end, HA_ERR_CORRUPT_EVENT); + return result; } #endif @@ -2383,15 +3165,15 @@ private: friend class Old_rows_log_event; }; -/***************************************************************************** - - Write row log event class +/** + @class Write_rows_log_event Log row insertions and updates. The event contain several insert/update rows for a table. Note that each event contains only rows for one table. - ****************************************************************************/ + @section Write_rows_log_event_binary_format Binary Format +*/ class Write_rows_log_event : public Rows_log_event { public: @@ -2438,9 +3220,8 @@ private: }; -/***************************************************************************** - - Update rows log event class +/** + @class Update_rows_log_event Log row updates with a before image. The event contain several update rows for a table. Note that each event contains only rows for @@ -2449,7 +3230,8 @@ private: Also note that the row data consists of pairs of row data: one row for the old data and one row for the new data. - ****************************************************************************/ + @section Update_rows_log_event_binary_format Binary Format +*/ class Update_rows_log_event : public Rows_log_event { public: @@ -2511,9 +3293,8 @@ protected: #endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ }; -/***************************************************************************** - - Delete rows log event class. +/** + @class Delete_rows_log_event Log row deletions. The event contain several delete rows for a table. Note that each event contains only rows for one table. @@ -2530,7 +3311,8 @@ protected: Row_reader Extract the rows from the event. - ****************************************************************************/ + @section Delete_rows_log_event_binary_format Binary Format +*/ class Delete_rows_log_event : public Rows_log_event { public: @@ -2580,6 +3362,8 @@ protected: #include "log_event_old.h" /** + @class Incident_log_event + Class representing an incident, an occurance out of the ordinary, that happened on the master. @@ -2591,7 +3375,7 @@ protected: <caption>Incident event format</caption> <tr> <th>Symbol</th> - <th>Size<br>(bytes)</th> + <th>Size<br/>(bytes)</th> <th>Description</th> </tr> <tr> @@ -2610,7 +3394,9 @@ protected: <td>The message, if present. Not null terminated.</td> </tr> </table> - */ + + @section Delete_rows_log_event_binary_format Binary Format +*/ class Incident_log_event : public Log_event { public: #ifndef MYSQL_CLIENT diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index f6d6c3ae2a2..6f3dda8afdf 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -688,7 +688,6 @@ bool check_single_table_access(THD *thd, ulong privilege, bool check_routine_access(THD *thd,ulong want_access,char *db,char *name, bool is_proc, bool no_errors); bool check_some_access(THD *thd, ulong want_access, TABLE_LIST *table); -bool check_merge_table_access(THD *thd, char *db, TABLE_LIST *table_list); bool check_some_routine_access(THD *thd, const char *db, const char *name, bool is_proc); #else inline bool check_one_table_access(THD *thd, ulong privilege, TABLE_LIST *tables) @@ -999,7 +998,8 @@ bool check_dup(const char *db, const char *name, TABLE_LIST *tables); bool compare_record(TABLE *table); bool append_file_to_dir(THD *thd, const char **filename_ptr, const char *table_name); - +void wait_while_table_is_used(THD *thd, TABLE *table, + enum ha_extra_function function); bool table_cache_init(void); void table_cache_free(void); bool table_def_init(void); @@ -1178,12 +1178,16 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type update, uint lock_flags); TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT* mem, bool *refresh, uint flags); +bool name_lock_locked_table(THD *thd, TABLE_LIST *tables); bool reopen_name_locked_table(THD* thd, TABLE_LIST* table_list, bool link_in); TABLE *table_cache_insert_placeholder(THD *thd, const char *key, uint key_length); bool lock_table_name_if_not_cached(THD *thd, const char *db, const char *table_name, TABLE **table); TABLE *find_locked_table(THD *thd, const char *db,const char *table_name); +void detach_merge_children(TABLE *table, bool clear_refs); +bool fix_merge_after_open(TABLE_LIST *old_child_list, TABLE_LIST **old_last, + TABLE_LIST *new_child_list, TABLE_LIST **new_last); bool reopen_table(TABLE *table); bool reopen_tables(THD *thd,bool get_locks,bool in_refresh); void close_data_files_and_morph_locks(THD *thd, const char *db, @@ -1325,12 +1329,9 @@ bool mysql_ha_open(THD *thd, TABLE_LIST *tables, bool reopen); bool mysql_ha_close(THD *thd, TABLE_LIST *tables); bool mysql_ha_read(THD *, TABLE_LIST *,enum enum_ha_read_modes,char *, List<Item> *,enum ha_rkey_function,Item *,ha_rows,ha_rows); -int mysql_ha_flush(THD *thd, TABLE_LIST *tables, uint mode_flags, - bool is_locked); -/* mysql_ha_flush mode_flags bits */ -#define MYSQL_HA_CLOSE_FINAL 0x00 -#define MYSQL_HA_REOPEN_ON_USAGE 0x01 -#define MYSQL_HA_FLUSH_ALL 0x02 +void mysql_ha_flush(THD *thd); +void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables, bool is_locked); +void mysql_ha_cleanup(THD *thd); /* sql_base.cc */ #define TMP_TABLE_KEY_EXTRA 8 @@ -1434,8 +1435,21 @@ int init_ftfuncs(THD *thd, SELECT_LEX* select, bool no_order); void wait_for_condition(THD *thd, pthread_mutex_t *mutex, pthread_cond_t *cond); int open_tables(THD *thd, TABLE_LIST **tables, uint *counter, uint flags); -int simple_open_n_lock_tables(THD *thd,TABLE_LIST *tables); -bool open_and_lock_tables(THD *thd,TABLE_LIST *tables); +/* open_and_lock_tables with optional derived handling */ +bool open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived); +/* simple open_and_lock_tables without derived handling */ +inline bool simple_open_n_lock_tables(THD *thd, TABLE_LIST *tables) +{ + return open_and_lock_tables_derived(thd, tables, FALSE); +} +/* open_and_lock_tables with derived handling */ +inline bool open_and_lock_tables(THD *thd, TABLE_LIST *tables) +{ + return open_and_lock_tables_derived(thd, tables, TRUE); +} +/* simple open_and_lock_tables without derived handling for single table */ +TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l, + thr_lock_type lock_type); bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags); int lock_tables(THD *thd, TABLE_LIST *tables, uint counter, bool *need_reopen); int decide_logging_format(THD *thd, TABLE_LIST *tables); @@ -2012,7 +2026,8 @@ int format_number(uint inputflag,uint max_length,char * pos,uint length, /* table.cc */ TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, uint key_length); -void init_tmp_table_share(TABLE_SHARE *share, const char *key, uint key_length, +void init_tmp_table_share(THD *thd, TABLE_SHARE *share, const char *key, + uint key_length, const char *table_name, const char *path); void free_table_share(TABLE_SHARE *share); int open_table_def(THD *thd, TABLE_SHARE *share, uint db_flags); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index de957b1594c..f27d3bc5fd5 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -2612,7 +2612,12 @@ int my_message_sql(uint error, const char *str, myf MyFlags) thd->is_slave_error= 1; // needed to catch query errors during replication if (!thd->no_warnings_for_error) + { + thd->no_warnings_for_error= TRUE; push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, error, str); + thd->no_warnings_for_error= FALSE; + } + /* thd->lex->current_select == 0 if lex structure is not inited (not query command (COM_QUERY)) @@ -2952,7 +2957,6 @@ static int init_common_variables(const char *conf_file_name, int argc, global_system_variables.collation_connection= default_charset_info; global_system_variables.character_set_results= default_charset_info; global_system_variables.character_set_client= default_charset_info; - global_system_variables.collation_connection= default_charset_info; if (!(character_set_filesystem= get_charset_by_csname(character_set_filesystem_name, @@ -4614,8 +4618,13 @@ pthread_handler_t handle_connections_sockets(void *arg __attribute__((unused))) sock == unix_sock ? VIO_LOCALHOST: 0)) || my_net_init(&thd->net,vio_tmp)) { - if (vio_tmp) - vio_delete(vio_tmp); + /* + Only delete the temporary vio if we didn't already attach it to the + NET object. The destructor in THD will delete any initialized net + structure. + */ + if (vio_tmp && thd->net.vio != vio_tmp) + vio_delete(vio_tmp); else { (void) shutdown(new_sock, SHUT_RDWR); @@ -7762,12 +7771,13 @@ mysqld_get_one_option(int optid, break; } case OPT_ONE_THREAD: - global_system_variables.thread_handling= 2; + global_system_variables.thread_handling= + SCHEDULER_ONE_THREAD_PER_CONNECTION; break; case OPT_THREAD_HANDLING: { global_system_variables.thread_handling= - find_type_or_exit(argument, &thread_handling_typelib, opt->name); + find_type_or_exit(argument, &thread_handling_typelib, opt->name)-1; break; } case OPT_FT_BOOLEAN_SYNTAX: diff --git a/sql/opt_range.cc b/sql/opt_range.cc index a56fd9aad64..732f947c44e 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -276,6 +276,9 @@ public: Field *field; uchar *min_value,*max_value; // Pointer to range + /* + eq_tree() requires that left == right == 0 if the type is MAYBE_KEY. + */ SEL_ARG *left,*right; /* R-B tree children */ SEL_ARG *next,*prev; /* Links for bi-directional interval list */ SEL_ARG *parent; /* R-B tree parent */ @@ -291,7 +294,7 @@ public: SEL_ARG(Field *field, uint8 part, uchar *min_value, uchar *max_value, uint8 min_flag, uint8 max_flag, uint8 maybe_flag); SEL_ARG(enum Type type_arg) - :min_flag(0),elements(1),use_count(1),left(0),next_key_part(0), + :min_flag(0),elements(1),use_count(1),left(0),right(0),next_key_part(0), color(BLACK), type(type_arg) {} inline bool is_same(SEL_ARG *arg) @@ -4469,8 +4472,8 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, ROR_SCAN_INFO's. Step 2: Get best ROR-intersection using an approximate algorithm. */ - qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), - (qsort_cmp)cmp_ror_scan_info); + my_qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info); DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", tree->ror_scans, tree->ror_scans_end);); @@ -4662,8 +4665,8 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, bitmap_get_first(&(*scan)->covered_fields); } - qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*), - (qsort_cmp)cmp_ror_scan_info_covering); + my_qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info_covering); DBUG_EXECUTE("info", print_ror_scans_arr(param->table, "remaining scans", @@ -5732,6 +5735,7 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, { tree= new (alloc) SEL_ARG(field, 0, 0); tree->type= SEL_ARG::IMPOSSIBLE; + goto end; } else { @@ -5740,8 +5744,32 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, for the cases like int_field > 999999999999999999999999 as well. */ tree= 0; + if (err == 3 && field->type() == FIELD_TYPE_DATE && + (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC || + type == Item_func::LT_FUNC || type == Item_func::LE_FUNC) ) + { + /* + We were saving DATETIME into a DATE column, the conversion went ok + but a non-zero time part was cut off. + + In MySQL's SQL dialect, DATE and DATETIME are compared as datetime + values. Index over a DATE column uses DATE comparison. Changing + from one comparison to the other is possible: + + datetime(date_col)< '2007-12-10 12:34:55' -> date_col<='2007-12-10' + datetime(date_col)<='2007-12-10 12:34:55' -> date_col<='2007-12-10' + + datetime(date_col)> '2007-12-10 12:34:55' -> date_col>='2007-12-10' + datetime(date_col)>='2007-12-10 12:34:55' -> date_col>='2007-12-10' + + but we'll need to convert '>' to '>=' and '<' to '<='. This will + be done together with other types at the end of this function + (grep for field_is_equal_to_item) + */ + } + else + goto end; } - goto end; } if (err < 0) { diff --git a/sql/partition_info.cc b/sql/partition_info.cc index ab887d5dda0..3b580422da1 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -524,6 +524,13 @@ bool partition_info::check_range_constants() current_largest= part_range_value; range_int_array[i]= part_range_value; } + else if (defined_max_value && + current_largest == part_range_value && + part_range_value == LONGLONG_MAX && + i == (no_parts - 1)) + { + range_int_array[i]= part_range_value; + } else { my_error(ER_RANGE_NOT_INCREASING_ERROR, MYF(0)); @@ -666,8 +673,8 @@ bool partition_info::check_list_constants() if (fixed && no_list_values) { bool first= TRUE; - qsort((void*)list_array, no_list_values, sizeof(LIST_PART_ENTRY), - &list_part_cmp); + my_qsort((void*)list_array, no_list_values, sizeof(LIST_PART_ENTRY), + &list_part_cmp); i= 0; LINT_INIT(prev_value); diff --git a/sql/protocol.cc b/sql/protocol.cc index 4e727799773..c147b68ca1f 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -76,6 +76,12 @@ void net_send_error(THD *thd, uint sql_errno, const char *err) DBUG_ASSERT(!thd->spcont); + if (thd->killed == THD::KILL_QUERY || thd->killed == THD::KILL_BAD_DATA) + { + thd->killed= THD::NOT_KILLED; + thd->mysys_var->abort= 0; + } + if (net && net->no_send_error) { thd->clear_error(); diff --git a/sql/records.cc b/sql/records.cc index 81c26da4b4d..349cc4a8329 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -55,6 +55,7 @@ static int rr_index(READ_RECORD *info); void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, bool print_error, uint idx) { + empty_record(table); bzero((char*) info,sizeof(*info)); info->table= table; info->file= table->file; @@ -161,6 +162,7 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, } else { + empty_record(table); info->record= table->record[0]; info->ref_length= table->file->ref_length; } @@ -565,7 +567,8 @@ static int rr_from_cache(READ_RECORD *info) int3store(ref_position,(long) i); ref_position+=3; } - qsort(info->read_positions,length,info->struct_length,(qsort_cmp) rr_cmp); + my_qsort(info->read_positions, length, info->struct_length, + (qsort_cmp) rr_cmp); position=info->read_positions; for (i=0 ; i < length ; i++) diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc index 834d87532af..453c26725d2 100644 --- a/sql/repl_failsafe.cc +++ b/sql/repl_failsafe.cc @@ -925,7 +925,7 @@ bool load_master_data(THD* thd) 0, (SLAVE_IO | SLAVE_SQL))) my_message(ER_MASTER_INFO, ER(ER_MASTER_INFO), MYF(0)); strmake(active_mi->master_log_name, row[0], - sizeof(active_mi->master_log_name)); + sizeof(active_mi->master_log_name) -1); active_mi->master_log_pos= my_strtoll10(row[1], (char**) 0, &error_2); /* at least in recent versions, the condition below should be false */ if (active_mi->master_log_pos < BIN_LOG_HEADER_SIZE) diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc index 65c8e106112..ed0dc82cf01 100644 --- a/sql/rpl_record.cc +++ b/sql/rpl_record.cc @@ -65,6 +65,8 @@ pack_row(TABLE *table, MY_BITMAP const* cols, my_ptrdiff_t const rec_offset= record - table->record[0]; my_ptrdiff_t const def_offset= table->s->default_values - table->record[0]; + DBUG_ENTER("pack_row"); + /* We write the null bits and the packed records using one pass through all the fields. The null bytes are written little-endian, @@ -96,26 +98,17 @@ pack_row(TABLE *table, MY_BITMAP const* cols, For big-endian machines, we have to make sure that the length is stored in little-endian format, since this is the format used for the binlog. - - We do this by setting the db_low_byte_first, which is used - inside some store_length() to decide what order to write the - bytes in. - - In reality, db_log_byte_first is only set for legacy table - type Isam, but in the event of a bug, we need to guarantee - the endianess when writing to the binlog. - - This is currently broken for NDB due to BUG#29549, so we - will fix it when NDB has fixed their way of handling BLOBs. */ -#if 0 - bool save= table->s->db_low_byte_first; - table->s->db_low_byte_first= TRUE; -#endif - pack_ptr= field->pack(pack_ptr, field->ptr + offset); -#if 0 - table->s->db_low_byte_first= save; +#ifndef DBUG_OFF + const uchar *old_pack_ptr= pack_ptr; #endif + pack_ptr= field->pack(pack_ptr, field->ptr + offset, + field->max_data_length(), TRUE); + DBUG_PRINT("debug", ("field: %s; pack_ptr: 0x%lx;" + " pack_ptr':0x%lx; bytes: %d", + field->field_name, (ulong) old_pack_ptr, + (ulong) pack_ptr, + (int) (pack_ptr - old_pack_ptr))); } null_mask <<= 1; @@ -143,8 +136,8 @@ pack_row(TABLE *table, MY_BITMAP const* cols, packed data. If it doesn't, something is very wrong. */ DBUG_ASSERT(null_ptr == row_data + null_byte_count); - - return static_cast<size_t>(pack_ptr - row_data); + DBUG_DUMP("row_data", row_data, pack_ptr - row_data); + DBUG_RETURN(static_cast<size_t>(pack_ptr - row_data)); } #endif @@ -242,18 +235,16 @@ unpack_row(Relay_log_info const *rli, Use the master's size information if available else call normal unpack operation. */ -#if 0 - bool save= table->s->db_low_byte_first; - table->s->db_low_byte_first= TRUE; -#endif uint16 const metadata= tabledef->field_metadata(i); - if (tabledef && metadata) - pack_ptr= f->unpack(f->ptr, pack_ptr, metadata); - else - pack_ptr= f->unpack(f->ptr, pack_ptr); -#if 0 - table->s->db_low_byte_first= save; +#ifndef DBUG_OFF + uchar const *const old_pack_ptr= pack_ptr; #endif + pack_ptr= f->unpack(f->ptr, pack_ptr, metadata, TRUE); + DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;" + " pack_ptr: 0x%lx; pack_ptr': 0x%lx; bytes: %d", + f->field_name, metadata, + (ulong) old_pack_ptr, (ulong) pack_ptr, + (int) (pack_ptr - old_pack_ptr))); } null_mask <<= 1; @@ -289,6 +280,8 @@ unpack_row(Relay_log_info const *rli, */ DBUG_ASSERT(null_ptr == row_data + master_null_byte_count); + DBUG_DUMP("row_data", row_data, pack_ptr - row_data); + *row_end = pack_ptr; if (master_reclength) { diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 867d55a60a3..15d7d97affd 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1082,6 +1082,9 @@ bool Relay_log_info::cached_charset_compare(char *charset) const void Relay_log_info::stmt_done(my_off_t event_master_log_pos, time_t event_creation_time) { +#ifndef DBUG_OFF + extern uint debug_not_change_ts_if_art_event; +#endif clear_flag(IN_STMT); /* @@ -1121,7 +1124,12 @@ void Relay_log_info::stmt_done(my_off_t event_master_log_pos, is that value may take some time to display in Seconds_Behind_Master - not critical). */ - last_master_timestamp= event_creation_time; +#ifndef DBUG_OFF + if (!(event_creation_time == 0 && debug_not_change_ts_if_art_event > 0)) +#else + if (event_creation_time != 0) +#endif + last_master_timestamp= event_creation_time; } } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 10ecf1a43d4..a3a57ad4ce9 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -366,6 +366,18 @@ public: } /** + Get the value of a replication state flag. + + @param flag Flag to get value of + + @return @c true if the flag was set, @c false otherwise. + */ + bool get_flag(enum_state_flag flag) + { + return m_flags & (1UL << flag); + } + + /** Clear the value of a replication state flag. @param flag Flag to clear diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc index d1ce5bf3b7b..b3ca26d4c2c 100644 --- a/sql/rpl_utility.cc +++ b/sql/rpl_utility.cc @@ -31,31 +31,34 @@ uint32 table_def::calc_field_size(uint col, uchar *master_data) const switch (type(col)) { case MYSQL_TYPE_NEWDECIMAL: length= my_decimal_get_binary_size(m_field_metadata[col] >> 8, - m_field_metadata[col] - ((m_field_metadata[col] >> 8) << 8)); + m_field_metadata[col] & 0xff); break; case MYSQL_TYPE_DECIMAL: case MYSQL_TYPE_FLOAT: case MYSQL_TYPE_DOUBLE: length= m_field_metadata[col]; break; + /* + The cases for SET and ENUM are include for completeness, however + both are mapped to type MYSQL_TYPE_STRING and their real types + are encoded in the field metadata. + */ case MYSQL_TYPE_SET: case MYSQL_TYPE_ENUM: case MYSQL_TYPE_STRING: { - if (((m_field_metadata[col] & 0xff00) == (MYSQL_TYPE_SET << 8)) || - ((m_field_metadata[col] & 0xff00) == (MYSQL_TYPE_ENUM << 8))) + uchar type= m_field_metadata[col] >> 8U; + if ((type == MYSQL_TYPE_SET) || (type == MYSQL_TYPE_ENUM)) length= m_field_metadata[col] & 0x00ff; else { - length= m_field_metadata[col] & 0x00ff; - DBUG_ASSERT(length > 0); - if (length > 255) - { - DBUG_ASSERT(uint2korr(master_data) > 0); - length= uint2korr(master_data) + 2; - } - else - length= (uint) *master_data + 1; + /* + We are reading the actual size from the master_data record + because this field has the actual lengh stored in the first + byte. + */ + length= (uint) *master_data + 1; + DBUG_ASSERT(length != 0); } break; } @@ -95,6 +98,13 @@ uint32 table_def::calc_field_size(uint col, uchar *master_data) const break; case MYSQL_TYPE_BIT: { + /* + Decode the size of the bit field from the master. + from_len is the length in bytes from the master + from_bit_len is the number of extra bits stored in the master record + If from_bit_len is not 0, add 1 to the length to account for accurate + number of bytes needed. + */ uint from_len= (m_field_metadata[col] >> 8U) & 0x00ff; uint from_bit_len= m_field_metadata[col] & 0x00ff; DBUG_ASSERT(from_bit_len <= 7); @@ -136,7 +146,7 @@ uint32 table_def::calc_field_size(uint col, uchar *master_data) const length= *master_data; break; case 2: - length= sint2korr(master_data); + length= uint2korr(master_data); break; case 3: length= uint3korr(master_data); diff --git a/sql/rpl_utility.h b/sql/rpl_utility.h index 26edbdd1405..375715c7858 100644 --- a/sql/rpl_utility.h +++ b/sql/rpl_utility.h @@ -99,7 +99,7 @@ public: /* These types store a single byte. */ - m_field_metadata[i]= (uchar)field_metadata[index]; + m_field_metadata[i]= field_metadata[index]; index++; break; } @@ -107,14 +107,14 @@ public: case MYSQL_TYPE_ENUM: case MYSQL_TYPE_STRING: { - short int x= field_metadata[index++] << 8U; // real_type - x = x + field_metadata[index++]; // pack or field length + uint16 x= field_metadata[index++] << 8U; // real_type + x+= field_metadata[index++]; // pack or field length m_field_metadata[i]= x; break; } case MYSQL_TYPE_BIT: { - short int x= field_metadata[index++]; + uint16 x= field_metadata[index++]; x = x + (field_metadata[index++] << 8U); m_field_metadata[i]= x; break; @@ -125,14 +125,14 @@ public: These types store two bytes. */ char *ptr= (char *)&field_metadata[index]; - m_field_metadata[i]= sint2korr(ptr); + m_field_metadata[i]= uint2korr(ptr); index= index + 2; break; } case MYSQL_TYPE_NEWDECIMAL: { - short int x= field_metadata[index++] << 8U; // precision - x = x + field_metadata[index++]; // decimals + uint16 x= field_metadata[index++] << 8U; // precision + x+= field_metadata[index++]; // decimals m_field_metadata[i]= x; break; } diff --git a/sql/scheduler.h b/sql/scheduler.h index 8351cefda4c..46bbd300cbb 100644 --- a/sql/scheduler.h +++ b/sql/scheduler.h @@ -40,7 +40,7 @@ public: enum scheduler_types { - SCHEDULER_ONE_THREAD_PER_CONNECTION=1, + SCHEDULER_ONE_THREAD_PER_CONNECTION=0, SCHEDULER_NO_THREADS, SCHEDULER_POOL_OF_THREADS }; diff --git a/sql/set_var.cc b/sql/set_var.cc index 8a6f0ac6285..f1826729914 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -168,7 +168,8 @@ static sys_var_character_set_sv sys_character_set_server(&vars, "character_set_s sys_var_const_str sys_charset_system(&vars, "character_set_system", (char *)my_charset_utf8_general_ci.name); static sys_var_character_set_database sys_character_set_database(&vars, "character_set_database"); -static sys_var_character_set_sv sys_character_set_client(&vars, "character_set_client", +static sys_var_character_set_client sys_character_set_client(&vars, + "character_set_client", &SV::character_set_client, &default_charset_info); static sys_var_character_set_sv sys_character_set_connection(&vars, "character_set_connection", @@ -1907,6 +1908,21 @@ CHARSET_INFO **sys_var_character_set_sv::ci_ptr(THD *thd, enum_var_type type) } +bool sys_var_character_set_client::check(THD *thd, set_var *var) +{ + if (sys_var_character_set_sv::check(thd, var)) + return 1; + /* Currently, UCS-2 cannot be used as a client character set */ + if (var->save_result.charset->mbminlen > 1) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), name, + var->save_result.charset->csname); + return 1; + } + return 0; +} + + CHARSET_INFO ** sys_var_character_set_database::ci_ptr(THD *thd, enum_var_type type) { @@ -2148,18 +2164,24 @@ void sys_var_log_state::set_default(THD *thd, enum_var_type type) static int sys_check_log_path(THD *thd, set_var *var) { - char path[FN_REFLEN]; + char path[FN_REFLEN], buff[FN_REFLEN]; MY_STAT f_stat; - const char *var_path= var->value->str_value.ptr(); + String str(buff, sizeof(buff), system_charset_info), *res; + const char *log_file_str; + + if (!(res= var->value->val_str(&str))) + goto err; + + log_file_str= res->c_ptr(); bzero(&f_stat, sizeof(MY_STAT)); - (void) unpack_filename(path, var_path); + (void) unpack_filename(path, log_file_str); if (my_stat(path, &f_stat, MYF(0))) { /* Check if argument is a file and we have 'write' permission */ if (!MY_S_ISREG(f_stat.st_mode) || !(f_stat.st_mode & MY_S_IWRITE)) - return -1; + goto err; } else { @@ -2168,11 +2190,16 @@ static int sys_check_log_path(THD *thd, set_var *var) Check if directory exists and we have permission to create file & write to file */ - (void) dirname_part(path, var_path, &path_length); + (void) dirname_part(path, log_file_str, &path_length); if (my_access(path, (F_OK|W_OK))) - return -1; + goto err; } return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name, + res ? log_file_str : "NULL"); + return 1; } @@ -2325,6 +2352,13 @@ uchar *sys_var_log_output::value_ptr(THD *thd, enum_var_type type, int set_var_collation_client::check(THD *thd) { + /* Currently, UCS-2 cannot be used as a client character set */ + if (character_set_client->mbminlen > 1) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "character_set_client", + character_set_client->csname); + return 1; + } return 0; } @@ -2946,7 +2980,8 @@ SHOW_VAR* enumerate_sys_vars(THD *thd, bool sorted) /* sort into order */ if (sorted) - qsort(result, count + fixed_count, sizeof(SHOW_VAR), (qsort_cmp)show_cmp); + my_qsort(result, count + fixed_count, sizeof(SHOW_VAR), + (qsort_cmp) show_cmp); /* make last element empty */ bzero(show, sizeof(SHOW_VAR)); diff --git a/sql/set_var.h b/sql/set_var.h index 2881903133d..4e13409b7f6 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -668,6 +668,20 @@ public: }; +class sys_var_character_set_client: public sys_var_character_set_sv +{ +public: + sys_var_character_set_client(sys_var_chain *chain, const char *name_arg, + CHARSET_INFO *SV::*offset_arg, + CHARSET_INFO **global_default_arg, + bool is_nullable= 0) + : sys_var_character_set_sv(chain, name_arg, offset_arg, global_default_arg, + is_nullable) + { } + bool check(THD *thd, set_var *var); +}; + + class sys_var_character_set_database :public sys_var_character_set { public: diff --git a/sql/slave.cc b/sql/slave.cc index 2f95f8ee2e9..f530f2a810d 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1014,6 +1014,12 @@ static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db, goto err; // mysql_parse took care of the error send thd->proc_info = "Opening master dump table"; + /* + Note: If this function starts to fail for MERGE tables, + change the next two lines to these: + tables.table= NULL; // was set by mysql_rm_table() + if (!open_n_lock_single_table(thd, &tables, TL_WRITE)) + */ tables.lock_type = TL_WRITE; if (!open_ltable(thd, &tables, TL_WRITE, 0)) { @@ -1716,7 +1722,14 @@ static int has_temporary_error(THD *thd) DBUG_ENTER("has_temporary_error"); if (thd->is_fatal_error) + { + DBUG_PRINT("info", ("thd->net.last_errno: %s", ER(thd->net.last_errno))); DBUG_RETURN(0); + } + + DBUG_EXECUTE_IF("all_errors_are_temporary_errors", + if (thd->net.last_errno) + thd->net.last_errno= ER_LOCK_DEADLOCK;); /* Temporary error codes: @@ -1725,7 +1738,10 @@ static int has_temporary_error(THD *thd) */ if (thd->net.last_errno == ER_LOCK_DEADLOCK || thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT) + { + DBUG_PRINT("info", ("thd->net.last_errno: %s", ER(thd->net.last_errno))); DBUG_RETURN(1); + } #ifdef HAVE_NDB_BINLOG /* @@ -1796,9 +1812,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) int const type_code= ev->get_type_code(); int exec_res= 0; - /* - */ - DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)", ev->get_type_str(), type_code, ev->server_id)); DBUG_PRINT("info", ("thd->options: %s%s; rli->last_event_start_time: %lu", @@ -1807,7 +1820,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) rli->last_event_start_time)); - /* Execute the event to change the database and update the binary log coordinates, but first we set some data that is needed for @@ -1855,10 +1867,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) // EVENT_SKIP_NOT, "not skipped", // EVENT_SKIP_IGNORE, - "skipped because event originated from this server", + "skipped because event should be ignored", // EVENT_SKIP_COUNT "skipped because event skip counter was non-zero" }; + DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d", + thd->options & OPTION_BEGIN ? 1 : 0, + rli->get_flag(Relay_log_info::IN_STMT))); DBUG_PRINT("skip_event", ("%s event was %s", ev->get_type_str(), explain[reason])); #endif @@ -1907,7 +1922,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) } if (slave_trans_retries) { - if (exec_res && has_temporary_error(thd)) + int temp_err; + if (exec_res && (temp_err= has_temporary_error(thd))) { const char *errmsg; /* @@ -1955,15 +1971,19 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) "the slave_transaction_retries variable.", slave_trans_retries); } - else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions)) + else if (exec_res && !temp_err || + (opt_using_transactions && + rli->group_relay_log_pos == rli->event_relay_log_pos)) { /* - Only reset the retry counter if the event succeeded or - failed with a non-transient error. On a successful event, - the execution will proceed as usual; in the case of a + Only reset the retry counter if the entire group succeeded + or failed with a non-transient error. On a successful + event, the execution will proceed as usual; in the case of a non-transient error, the slave will stop with an error. */ rli->trans_retries= 0; // restart from fresh + DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %lu", + rli->trans_retries)); } } DBUG_RETURN(exec_res); @@ -2456,6 +2476,7 @@ pthread_handler_t handle_slave_sql(void *arg) rli->ignore_log_space_limit= 0; pthread_mutex_unlock(&rli->log_space_lock); rli->trans_retries= 0; // start from "no error" + DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries)); if (init_relay_log_pos(rli, rli->group_relay_log_name, @@ -3587,7 +3608,16 @@ static Log_event* next_event(Relay_log_info* rli) a new event and is queuing it; the false "0" will exist until SQL finishes executing the new event; it will be look abnormal only if the events have old timestamps (then you get "many", 0, "many"). - Transient phases like this can't really be fixed. + + Transient phases like this can be fixed with implemeting + Heartbeat event which provides the slave the status of the + master at time the master does not have any new update to send. + Seconds_Behind_Master would be zero only when master has no + more updates in binlog for slave. The heartbeat can be sent + in a (small) fraction of slave_net_timeout. Until it's done + rli->last_master_timestamp is temporarely (for time of + waiting for the following event) reset whenever EOF is + reached. */ time_t save_timestamp= rli->last_master_timestamp; rli->last_master_timestamp= 0; diff --git a/sql/sp_head.cc b/sql/sp_head.cc index c0ea73a6c00..4b432cef5cd 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -102,8 +102,9 @@ sp_get_item_value(THD *thd, Item *item, String *str) case REAL_RESULT: case INT_RESULT: case DECIMAL_RESULT: - return item->val_str(str); - + if (item->field_type() != MYSQL_TYPE_BIT) + return item->val_str(str); + else {/* Bit type is handled as binary string */} case STRING_RESULT: { String *result= item->val_str(str); @@ -465,14 +466,16 @@ check_routine_name(LEX_STRING *ident) */ void * -sp_head::operator new(size_t size) +sp_head::operator new(size_t size) throw() { DBUG_ENTER("sp_head::operator new"); MEM_ROOT own_root; sp_head *sp; - init_alloc_root(&own_root, MEM_ROOT_BLOCK_SIZE, MEM_ROOT_PREALLOC); + init_sql_alloc(&own_root, MEM_ROOT_BLOCK_SIZE, MEM_ROOT_PREALLOC); sp= (sp_head *) alloc_root(&own_root, size); + if (sp == NULL) + return NULL; sp->main_mem_root= own_root; DBUG_PRINT("info", ("mem_root 0x%lx", (ulong) &sp->mem_root)); DBUG_RETURN(sp); @@ -483,6 +486,10 @@ sp_head::operator delete(void *ptr, size_t size) { DBUG_ENTER("sp_head::operator delete"); MEM_ROOT own_root; + + if (ptr == NULL) + DBUG_VOID_RETURN; + sp_head *sp= (sp_head *) ptr; /* Make a copy of main_mem_root as free_root will free the sp */ @@ -536,6 +543,9 @@ sp_head::init(LEX *lex) lex->spcont= m_pcont= new sp_pcontext(); + if (!lex->spcont) + DBUG_VOID_RETURN; + /* Altough trg_table_fields list is used only in triggers we init for all types of stored procedures to simplify reset_lex()/restore_lex() code. @@ -1069,7 +1079,7 @@ sp_head::execute(THD *thd) DBUG_RETURN(TRUE); /* init per-instruction memroot */ - init_alloc_root(&execute_mem_root, MEM_ROOT_BLOCK_SIZE, 0); + init_sql_alloc(&execute_mem_root, MEM_ROOT_BLOCK_SIZE, 0); DBUG_ASSERT(!(m_flags & IS_INVOKED)); m_flags|= IS_INVOKED; @@ -1961,16 +1971,29 @@ sp_head::execute_procedure(THD *thd, List<Item> *args) } -// Reset lex during parsing, before we parse a sub statement. -void +/** + @brief Reset lex during parsing, before we parse a sub statement. + + @param thd Thread handler. + + @return Error state + @retval true An error occurred. + @retval false Success. +*/ + +bool sp_head::reset_lex(THD *thd) { DBUG_ENTER("sp_head::reset_lex"); LEX *sublex; LEX *oldlex= thd->lex; + sublex= new (thd->mem_root)st_lex_local; + if (sublex == 0) + DBUG_RETURN(TRUE); + + thd->lex= sublex; (void)m_lex.push_front(oldlex); - thd->lex= sublex= new st_lex; /* Reset most stuff. */ lex_start(thd); @@ -1991,7 +2014,7 @@ sp_head::reset_lex(THD *thd) sublex->interval_list.empty(); sublex->type= 0; - DBUG_VOID_RETURN; + DBUG_RETURN(FALSE); } // Restore lex during parsing, after we have parsed a sub statement. @@ -3856,7 +3879,7 @@ sp_add_to_query_tables(THD *thd, LEX *lex, if (!(table= (TABLE_LIST *)thd->calloc(sizeof(TABLE_LIST)))) { - my_error(ER_OUTOFMEMORY, MYF(0), sizeof(TABLE_LIST)); + thd->fatal_error(); return NULL; } table->db_length= strlen(db); diff --git a/sql/sp_head.h b/sql/sp_head.h index d74170534bd..86a77a434ff 100644 --- a/sql/sp_head.h +++ b/sql/sp_head.h @@ -260,10 +260,10 @@ public: Security_context m_security_ctx; static void * - operator new(size_t size); + operator new(size_t size) throw (); static void - operator delete(void *ptr, size_t size); + operator delete(void *ptr, size_t size) throw (); sp_head(); @@ -326,7 +326,7 @@ public: } // Resets lex in 'thd' and keeps a copy of the old one. - void + bool reset_lex(THD *thd); // Restores lex in 'thd' from our copy, but keeps some status from the diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 311b76c6149..58ba8c28fb7 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -365,8 +365,8 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables) #endif VOID(push_dynamic(&acl_hosts,(uchar*) &host)); } - qsort((uchar*) dynamic_element(&acl_hosts,0,ACL_HOST*),acl_hosts.elements, - sizeof(ACL_HOST),(qsort_cmp) acl_compare); + my_qsort((uchar*) dynamic_element(&acl_hosts,0,ACL_HOST*),acl_hosts.elements, + sizeof(ACL_HOST),(qsort_cmp) acl_compare); end_read_record(&read_record_info); freeze_size(&acl_hosts); @@ -553,8 +553,8 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables) allow_all_hosts=1; // Anyone can connect } } - qsort((uchar*) dynamic_element(&acl_users,0,ACL_USER*),acl_users.elements, - sizeof(ACL_USER),(qsort_cmp) acl_compare); + my_qsort((uchar*) dynamic_element(&acl_users,0,ACL_USER*),acl_users.elements, + sizeof(ACL_USER),(qsort_cmp) acl_compare); end_read_record(&read_record_info); freeze_size(&acl_users); @@ -612,8 +612,8 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables) #endif VOID(push_dynamic(&acl_dbs,(uchar*) &db)); } - qsort((uchar*) dynamic_element(&acl_dbs,0,ACL_DB*),acl_dbs.elements, - sizeof(ACL_DB),(qsort_cmp) acl_compare); + my_qsort((uchar*) dynamic_element(&acl_dbs,0,ACL_DB*),acl_dbs.elements, + sizeof(ACL_DB),(qsort_cmp) acl_compare); end_read_record(&read_record_info); freeze_size(&acl_dbs); init_check_host(); @@ -1243,8 +1243,8 @@ static void acl_insert_user(const char *user, const char *host, if (!acl_user.host.hostname || (acl_user.host.hostname[0] == wild_many && !acl_user.host.hostname[1])) allow_all_hosts=1; // Anyone can connect /* purecov: tested */ - qsort((uchar*) dynamic_element(&acl_users,0,ACL_USER*),acl_users.elements, - sizeof(ACL_USER),(qsort_cmp) acl_compare); + my_qsort((uchar*) dynamic_element(&acl_users,0,ACL_USER*),acl_users.elements, + sizeof(ACL_USER),(qsort_cmp) acl_compare); /* Rebuild 'acl_check_hosts' since 'acl_users' has been modified */ rebuild_check_host(); @@ -1306,8 +1306,8 @@ static void acl_insert_db(const char *user, const char *host, const char *db, acl_db.access=privileges; acl_db.sort=get_sort(3,acl_db.host.hostname,acl_db.db,acl_db.user); VOID(push_dynamic(&acl_dbs,(uchar*) &acl_db)); - qsort((uchar*) dynamic_element(&acl_dbs,0,ACL_DB*),acl_dbs.elements, - sizeof(ACL_DB),(qsort_cmp) acl_compare); + my_qsort((uchar*) dynamic_element(&acl_dbs,0,ACL_DB*),acl_dbs.elements, + sizeof(ACL_DB),(qsort_cmp) acl_compare); } @@ -3472,16 +3472,13 @@ void grant_free(void) } -/* - Initialize structures responsible for table/column-level privilege checking - and load information for them from tables in the 'mysql' database. - - SYNOPSIS - grant_init() +/** + @brief Initialize structures responsible for table/column-level privilege + checking and load information for them from tables in the 'mysql' database. - RETURN VALUES - 0 ok - 1 Could not initialize grant's + @return Error status + @retval 0 OK + @retval 1 Could not initialize grant subsystem. */ my_bool grant_init() @@ -3503,96 +3500,144 @@ my_bool grant_init() } -/* - Initialize structures responsible for table/column-level privilege - checking and load information about grants from open privilege tables. +/** + @brief Helper function to grant_reload_procs_priv - SYNOPSIS - grant_load() - thd Current thread - tables List containing open "mysql.tables_priv" and - "mysql.columns_priv" tables. + Reads the procs_priv table into memory hash. - RETURN VALUES - FALSE - success - TRUE - error + @param table A pointer to the procs_priv table structure. + + @see grant_reload + @see grant_reload_procs_priv + + @return Error state + @retval TRUE An error occurred + @retval FALSE Success */ -static my_bool grant_load(TABLE_LIST *tables) +static my_bool grant_load_procs_priv(TABLE *p_table) { MEM_ROOT *memex_ptr; my_bool return_val= 1; - TABLE *t_table, *c_table, *p_table; bool check_no_resolve= specialflag & SPECIAL_NO_RESOLVE; MEM_ROOT **save_mem_root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); DBUG_ENTER("grant_load"); - - (void) hash_init(&column_priv_hash,system_charset_info, - 0,0,0, (hash_get_key) get_grant_table, - (hash_free_key) free_grant_table,0); (void) hash_init(&proc_priv_hash,system_charset_info, - 0,0,0, (hash_get_key) get_grant_table, - 0,0); + 0,0,0, (hash_get_key) get_grant_table, + 0,0); (void) hash_init(&func_priv_hash,system_charset_info, - 0,0,0, (hash_get_key) get_grant_table, - 0,0); - init_sql_alloc(&memex, ACL_ALLOC_BLOCK_SIZE, 0); - - t_table = tables[0].table; - c_table = tables[1].table; - p_table= tables[2].table; - t_table->file->ha_index_init(0, 1); + 0,0,0, (hash_get_key) get_grant_table, + 0,0); p_table->file->ha_index_init(0, 1); - t_table->use_all_columns(); - c_table->use_all_columns(); p_table->use_all_columns(); - if (!t_table->file->index_first(t_table->record[0])) + + if (!p_table->file->index_first(p_table->record[0])) { memex_ptr= &memex; my_pthread_setspecific_ptr(THR_MALLOC, &memex_ptr); do { - GRANT_TABLE *mem_check; - if (!(mem_check=new (memex_ptr) GRANT_TABLE(t_table,c_table))) + GRANT_NAME *mem_check; + HASH *hash; + if (!(mem_check=new (memex_ptr) GRANT_NAME(p_table))) { - /* This could only happen if we are out memory */ - goto end_unlock; + /* This could only happen if we are out memory */ + goto end_unlock; } if (check_no_resolve) { if (hostname_requires_resolving(mem_check->host.hostname)) { - sql_print_warning("'tables_priv' entry '%s %s@%s' " + sql_print_warning("'procs_priv' entry '%s %s@%s' " "ignored in --skip-name-resolve mode.", - mem_check->tname, - mem_check->user ? mem_check->user : "", + mem_check->tname, mem_check->user, mem_check->host.hostname ? mem_check->host.hostname : ""); - continue; - } + continue; + } + } + if (p_table->field[4]->val_int() == TYPE_ENUM_PROCEDURE) + { + hash= &proc_priv_hash; + } + else + if (p_table->field[4]->val_int() == TYPE_ENUM_FUNCTION) + { + hash= &func_priv_hash; + } + else + { + sql_print_warning("'procs_priv' entry '%s' " + "ignored, bad routine type", + mem_check->tname); + continue; } + mem_check->privs= fix_rights_for_procedure(mem_check->privs); if (! mem_check->ok()) - delete mem_check; - else if (my_hash_insert(&column_priv_hash,(uchar*) mem_check)) + delete mem_check; + else if (my_hash_insert(hash, (uchar*) mem_check)) { - delete mem_check; - goto end_unlock; + delete mem_check; + goto end_unlock; } } - while (!t_table->file->index_next(t_table->record[0])); + while (!p_table->file->index_next(p_table->record[0])); } - if (!p_table->file->index_first(p_table->record[0])) + /* Return ok */ + return_val= 0; + +end_unlock: + p_table->file->ha_index_end(); + my_pthread_setspecific_ptr(THR_MALLOC, save_mem_root_ptr); + DBUG_RETURN(return_val); +} + + +/** + @brief Initialize structures responsible for table/column-level privilege + checking and load information about grants from open privilege tables. + + @param thd Current thread + @param tables List containing open "mysql.tables_priv" and + "mysql.columns_priv" tables. + + @see grant_reload + + @return Error state + @retval FALSE Success + @retval TRUE Error +*/ + +static my_bool grant_load(TABLE_LIST *tables) +{ + MEM_ROOT *memex_ptr; + my_bool return_val= 1; + TABLE *t_table= 0, *c_table= 0; + bool check_no_resolve= specialflag & SPECIAL_NO_RESOLVE; + MEM_ROOT **save_mem_root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, + THR_MALLOC); + DBUG_ENTER("grant_load"); + (void) hash_init(&column_priv_hash,system_charset_info, + 0,0,0, (hash_get_key) get_grant_table, + (hash_free_key) free_grant_table,0); + + t_table = tables[0].table; + c_table = tables[1].table; + t_table->file->ha_index_init(0, 1); + t_table->use_all_columns(); + c_table->use_all_columns(); + + if (!t_table->file->index_first(t_table->record[0])) { memex_ptr= &memex; my_pthread_setspecific_ptr(THR_MALLOC, &memex_ptr); do { - GRANT_NAME *mem_check; - HASH *hash; - if (!(mem_check=new (&memex) GRANT_NAME(p_table))) + GRANT_TABLE *mem_check; + if (!(mem_check=new (memex_ptr) GRANT_TABLE(t_table,c_table))) { /* This could only happen if we are out memory */ goto end_unlock; @@ -3602,74 +3647,111 @@ static my_bool grant_load(TABLE_LIST *tables) { if (hostname_requires_resolving(mem_check->host.hostname)) { - sql_print_warning("'procs_priv' entry '%s %s@%s' " + sql_print_warning("'tables_priv' entry '%s %s@%s' " "ignored in --skip-name-resolve mode.", - mem_check->tname, mem_check->user, + mem_check->tname, + mem_check->user ? mem_check->user : "", mem_check->host.hostname ? mem_check->host.hostname : ""); continue; } } - if (p_table->field[4]->val_int() == TYPE_ENUM_PROCEDURE) - { - hash= &proc_priv_hash; - } - else - if (p_table->field[4]->val_int() == TYPE_ENUM_FUNCTION) - { - hash= &func_priv_hash; - } - else - { - sql_print_warning("'procs_priv' entry '%s' " - "ignored, bad routine type", - mem_check->tname); - continue; - } - mem_check->privs= fix_rights_for_procedure(mem_check->privs); if (! mem_check->ok()) delete mem_check; - else if (my_hash_insert(hash, (uchar*) mem_check)) + else if (my_hash_insert(&column_priv_hash,(uchar*) mem_check)) { delete mem_check; goto end_unlock; } } - while (!p_table->file->index_next(p_table->record[0])); + while (!t_table->file->index_next(t_table->record[0])); } + return_val=0; // Return ok end_unlock: t_table->file->ha_index_end(); - p_table->file->ha_index_end(); my_pthread_setspecific_ptr(THR_MALLOC, save_mem_root_ptr); DBUG_RETURN(return_val); } -/* - Reload information about table and column level privileges if possible. +/** + @brief Helper function to grant_reload. Reloads procs_priv table is it + exists. - SYNOPSIS - grant_reload() - thd Current thread + @param thd A pointer to the thread handler object. - NOTES - Locked tables are checked by acl_reload() and doesn't have to be checked - in this call. - This function is also used for initialization of structures responsible - for table/column-level privilege checking. + @see grant_reload - RETURN VALUE - FALSE Success - TRUE Error + @return Error state + @retval FALSE Success + @retval TRUE An error has occurred. +*/ + +static my_bool grant_reload_procs_priv(THD *thd) +{ + HASH old_proc_priv_hash, old_func_priv_hash; + TABLE_LIST table; + my_bool return_val= FALSE; + DBUG_ENTER("grant_reload_procs_priv"); + + bzero((char*) &table, sizeof(table)); + table.alias= table.table_name= (char*) "procs_priv"; + table.db= (char *) "mysql"; + table.lock_type= TL_READ; + + if (simple_open_n_lock_tables(thd, &table)) + { + close_thread_tables(thd); + DBUG_RETURN(TRUE); + } + + /* Save a copy of the current hash if we need to undo the grant load */ + old_proc_priv_hash= proc_priv_hash; + old_func_priv_hash= func_priv_hash; + + rw_wrlock(&LOCK_grant); + if ((return_val= grant_load_procs_priv(table.table))) + { + /* Error; Reverting to old hash */ + DBUG_PRINT("error",("Reverting to old privileges")); + grant_free(); + proc_priv_hash= old_proc_priv_hash; + func_priv_hash= old_func_priv_hash; + } + else + { + hash_free(&old_proc_priv_hash); + hash_free(&old_func_priv_hash); + } + rw_unlock(&LOCK_grant); + + close_thread_tables(thd); + DBUG_RETURN(return_val); +} + + +/** + @brief Reload information about table and column level privileges if possible + + @param thd Current thread + + Locked tables are checked by acl_reload() and doesn't have to be checked + in this call. + This function is also used for initialization of structures responsible + for table/column-level privilege checking. + + @return Error state + @retval FALSE Success + @retval TRUE Error */ my_bool grant_reload(THD *thd) { - TABLE_LIST tables[3]; - HASH old_column_priv_hash, old_proc_priv_hash, old_func_priv_hash; + TABLE_LIST tables[2]; + HASH old_column_priv_hash; MEM_ROOT old_mem; my_bool return_val= 1; DBUG_ENTER("grant_reload"); @@ -3681,11 +3763,9 @@ my_bool grant_reload(THD *thd) bzero((char*) tables, sizeof(tables)); tables[0].alias= tables[0].table_name= (char*) "tables_priv"; tables[1].alias= tables[1].table_name= (char*) "columns_priv"; - tables[2].alias= tables[2].table_name= (char*) "procs_priv"; - tables[0].db= tables[1].db= tables[2].db= (char *) "mysql"; + tables[0].db= tables[1].db= (char *) "mysql"; tables[0].next_local= tables[0].next_global= tables+1; - tables[1].next_local= tables[1].next_global= tables+2; - tables[0].lock_type= tables[1].lock_type= tables[2].lock_type= TL_READ; + tables[0].lock_type= tables[1].lock_type= TL_READ; /* To avoid deadlocks we should obtain table locks before @@ -3695,35 +3775,45 @@ my_bool grant_reload(THD *thd) goto end; rw_wrlock(&LOCK_grant); - grant_version++; old_column_priv_hash= column_priv_hash; - old_proc_priv_hash= proc_priv_hash; - old_func_priv_hash= func_priv_hash; + + /* + Create a new memory pool but save the current memory pool to make an undo + opertion possible in case of failure. + */ old_mem= memex; + init_sql_alloc(&memex, ACL_ALLOC_BLOCK_SIZE, 0); if ((return_val= grant_load(tables))) { // Error. Revert to old hash DBUG_PRINT("error",("Reverting to old privileges")); grant_free(); /* purecov: deadcode */ column_priv_hash= old_column_priv_hash; /* purecov: deadcode */ - proc_priv_hash= old_proc_priv_hash; - func_priv_hash= old_func_priv_hash; memex= old_mem; /* purecov: deadcode */ } else { hash_free(&old_column_priv_hash); - hash_free(&old_proc_priv_hash); - hash_free(&old_func_priv_hash); free_root(&old_mem,MYF(0)); } rw_unlock(&LOCK_grant); -end: close_thread_tables(thd); + + /* + It is ok failing to load procs_priv table because we may be + working with 4.1 privilege tables. + */ + if (grant_reload_procs_priv(thd)) + my_error(ER_CANNOT_LOAD_FROM_TABLE, MYF(0), "mysql.procs_priv"); + + rw_wrlock(&LOCK_grant); + grant_version++; + rw_unlock(&LOCK_grant); + +end: DBUG_RETURN(return_val); } - /**************************************************************************** Check table level grants diff --git a/sql/sql_array.h b/sql/sql_array.h index ab6fdd0c5c0..e1b22921519 100644 --- a/sql/sql_array.h +++ b/sql/sql_array.h @@ -62,7 +62,7 @@ public: void sort(CMP_FUNC cmp_func) { - qsort(array.buffer, array.elements, sizeof(Elem), (qsort_cmp)cmp_func); + my_qsort(array.buffer, array.elements, sizeof(Elem), (qsort_cmp)cmp_func); } }; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 6896ccd1b8c..4474671eadf 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -87,7 +87,6 @@ bool Prelock_error_handler::safely_trapped_errors() @defgroup Data_Dictionary Data Dictionary @{ */ - TABLE *unused_tables; /* Used by mysql_test */ HASH open_cache; /* Used by mysql_test */ static HASH table_def_cache; @@ -684,6 +683,9 @@ TABLE_SHARE *get_cached_table_share(const char *db, const char *table_name) to open the table thd->killed will be set if we run out of memory + + If closing a MERGE child, the calling function has to take care for + closing the parent too, if necessary. */ @@ -712,6 +714,12 @@ void close_handle_and_leave_table_as_lock(TABLE *table) share->tmp_table= INTERNAL_TMP_TABLE; // for intern_close_table() } + /* + When closing a MERGE parent or child table, detach the children first. + Do not clear child table references to allow for reopen. + */ + if (table->child_l || table->parent) + detach_merge_children(table, FALSE); table->file->close(); table->db_stat= 0; // Mark file closed release_table_share(table->s, RELEASE_NORMAL); @@ -812,6 +820,10 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) void intern_close_table(TABLE *table) { // Free all structures DBUG_ENTER("intern_close_table"); + DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", + table->s ? table->s->db.str : "?", + table->s ? table->s->table_name.str : "?", + (long) table)); free_io_cache(table); delete table->triggers; @@ -835,6 +847,9 @@ static void free_cache_entry(TABLE *table) { DBUG_ENTER("free_cache_entry"); + /* Assert that MERGE children are not attached before final close. */ + DBUG_ASSERT(!table->is_children_attached()); + intern_close_table(table); if (!table->in_use) { @@ -901,6 +916,54 @@ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, pthread_mutex_lock(&oldest_unused_share->mutex); VOID(hash_delete(&table_def_cache, (uchar*) oldest_unused_share)); } + DBUG_PRINT("tcache", ("incremented global refresh_version to: %lu", + refresh_version)); + if (if_wait_for_refresh) + { + /* + Other threads could wait in a loop in open_and_lock_tables(), + trying to lock one or more of our tables. + + If they wait for the locks in thr_multi_lock(), their lock + request is aborted. They loop in open_and_lock_tables() and + enter open_table(). Here they notice the table is refreshed and + wait for COND_refresh. Then they loop again in + open_and_lock_tables() and this time open_table() succeeds. At + this moment, if we (the FLUSH TABLES thread) are scheduled and + on another FLUSH TABLES enter close_cached_tables(), they could + awake while we sleep below, waiting for others threads (us) to + close their open tables. If this happens, the other threads + would find the tables unlocked. They would get the locks, one + after the other, and could do their destructive work. This is an + issue if we have LOCK TABLES in effect. + + The problem is that the other threads passed all checks in + open_table() before we refresh the table. + + The fix for this problem is to set some_tables_deleted for all + threads with open tables. These threads can still get their + locks, but will immediately release them again after checking + this variable. They will then loop in open_and_lock_tables() + again. There they will wait until we update all tables version + below. + + Setting some_tables_deleted is done by remove_table_from_cache() + in the other branch. + + In other words (reviewer suggestion): You need this setting of + some_tables_deleted for the case when table was opened and all + related checks were passed before incrementing refresh_version + (which you already have) but attempt to lock the table happened + after the call to close_old_data_files() i.e. after removal of + current thread locks. + */ + for (uint idx=0 ; idx < open_cache.records ; idx++) + { + TABLE *table=(TABLE*) hash_element(&open_cache,idx); + if (table->in_use) + table->in_use->some_tables_deleted= 1; + } + } } else { @@ -929,8 +992,8 @@ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, thd->proc_info="Flushing tables"; close_old_data_files(thd,thd->open_tables,1,1); - mysql_ha_flush(thd, tables, MYSQL_HA_REOPEN_ON_USAGE | MYSQL_HA_FLUSH_ALL, - TRUE); + mysql_ha_flush(thd); + bool found=1; /* Wait until all threads has closed all the tables we had locked */ DBUG_PRINT("info", @@ -1073,6 +1136,14 @@ static void mark_temp_tables_as_free_for_reuse(THD *thd) { table->query_id= 0; table->file->ha_reset(); + /* + Detach temporary MERGE children from temporary parent to allow new + attach at next open. Do not do the detach, if close_thread_tables() + is called from a sub-statement. The temporary table might still be + used in the top-level statement. + */ + if (table->child_l || table->parent) + detach_merge_children(table, TRUE); } } } @@ -1170,9 +1241,17 @@ static void close_open_tables(THD *thd) void close_thread_tables(THD *thd) { + TABLE *table; prelocked_mode_type prelocked_mode= thd->prelocked_mode; DBUG_ENTER("close_thread_tables"); +#ifdef EXTRA_DEBUG + DBUG_PRINT("tcache", ("open tables:")); + for (table= thd->open_tables; table; table= table->next) + DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str, + table->s->table_name.str, (long) table)); +#endif + /* We are assuming here that thd->derived_tables contains ONLY derived tables for this substatement. i.e. instead of approach which uses @@ -1186,7 +1265,7 @@ void close_thread_tables(THD *thd) */ if (thd->derived_tables) { - TABLE *table, *next; + TABLE *next; /* Close all derived tables generated in queries like SELECT * FROM (SELECT * FROM t1) @@ -1266,6 +1345,13 @@ void close_thread_tables(THD *thd) if (!thd->active_transaction()) thd->transaction.xid_state.xid.null(); + /* + Note that we need to hold LOCK_open while changing the + open_tables list. Another thread may work on it. + (See: remove_table_from_cache(), mysql_wait_completed_table()) + Closing a MERGE child before the parent would be fatal if the + other thread tries to abort the MERGE lock in between. + */ if (thd->open_tables) close_open_tables(thd); @@ -1292,8 +1378,17 @@ bool close_thread_table(THD *thd, TABLE **table_ptr) DBUG_ENTER("close_thread_table"); DBUG_ASSERT(table->key_read == 0); DBUG_ASSERT(!table->file || table->file->inited == handler::NONE); + DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str, + table->s->table_name.str, (long) table)); *table_ptr=table->next; + /* + When closing a MERGE parent or child table, detach the children first. + Clear child table references to force new assignment at next open. + */ + if (table->child_l || table->parent) + detach_merge_children(table, TRUE); + if (table->needs_reopen_or_name_lock() || thd->version != refresh_version || !table->db_stat) { @@ -1308,6 +1403,9 @@ bool close_thread_table(THD *thd, TABLE **table_ptr) */ DBUG_ASSERT(!table->open_placeholder); + /* Assert that MERGE children are not attached in unused_tables. */ + DBUG_ASSERT(!table->is_children_attached()); + /* Free memory and reset for next loop */ table->file->ha_reset(); table->in_use=0; @@ -1729,6 +1827,8 @@ int drop_temporary_table(THD *thd, TABLE_LIST *table_list) { TABLE *table; DBUG_ENTER("drop_temporary_table"); + DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'", + table_list->db, table_list->table_name)); if (!(table= find_temporary_table(thd, table_list))) DBUG_RETURN(1); @@ -1756,6 +1856,24 @@ int drop_temporary_table(THD *thd, TABLE_LIST *table_list) void close_temporary_table(THD *thd, TABLE *table, bool free_share, bool delete_table) { + DBUG_ENTER("close_temporary_table"); + DBUG_PRINT("tmptable", ("closing table: '%s'.'%s' 0x%lx alias: '%s'", + table->s->db.str, table->s->table_name.str, + (long) table, table->alias)); + + /* + When closing a MERGE parent or child table, detach the children + first. Clear child table references as MERGE table cannot be + reopened after final close of one of its tables. + + This is necessary here because it is sometimes called with attached + tables and without prior close_thread_tables(). E.g. in + mysql_alter_table(), mysql_rm_table_part2(), mysql_truncate(), + drop_open_table(). + */ + if (table->child_l || table->parent) + detach_merge_children(table, TRUE); + if (table->prev) { table->prev->next= table->next; @@ -1782,6 +1900,7 @@ void close_temporary_table(THD *thd, TABLE *table, slave_open_temp_tables--; } close_temporary(table, free_share, delete_table); + DBUG_VOID_RETURN; } @@ -1797,6 +1916,8 @@ void close_temporary(TABLE *table, bool free_share, bool delete_table) { handlerton *table_type= table->s->db_type(); DBUG_ENTER("close_temporary"); + DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'", + table->s->db.str, table->s->table_name.str)); free_io_cache(table); closefrm(table, 0); @@ -1843,6 +1964,9 @@ bool rename_temporary_table(THD* thd, TABLE *table, const char *db, static void relink_unused(TABLE *table) { + /* Assert that MERGE children are not attached in unused_tables. */ + DBUG_ASSERT(!table->is_children_attached()); + if (table != unused_tables) { table->prev->next=table->next; /* Remove from unused list */ @@ -1858,6 +1982,77 @@ static void relink_unused(TABLE *table) /** + @brief Prepare an open merge table for close. + + @param[in] thd thread context + @param[in] table table to prepare + @param[in,out] prev_pp pointer to pointer of previous table + + @detail + If the table is a MERGE parent, just detach the children. + If the table is a MERGE child, close the parent (incl. detach). +*/ + +static void unlink_open_merge(THD *thd, TABLE *table, TABLE ***prev_pp) +{ + DBUG_ENTER("unlink_open_merge"); + + if (table->parent) + { + /* + If MERGE child, close parent too. Closing includes detaching. + + This is used for example in ALTER TABLE t1 RENAME TO t5 under + LOCK TABLES where t1 is a MERGE child: + CREATE TABLE t1 (c1 INT); + CREATE TABLE t2 (c1 INT) ENGINE=MRG_MYISAM UNION=(t1); + LOCK TABLES t1 WRITE, t2 WRITE; + ALTER TABLE t1 RENAME TO t5; + */ + TABLE *parent= table->parent; + TABLE **prv_p; + + /* Find parent in open_tables list. */ + for (prv_p= &thd->open_tables; + *prv_p && (*prv_p != parent); + prv_p= &(*prv_p)->next) {} + if (*prv_p) + { + /* Special treatment required if child follows parent in list. */ + if (*prev_pp == &parent->next) + *prev_pp= prv_p; + /* + Remove parent from open_tables list and close it. + This includes detaching and hence clearing parent references. + */ + close_thread_table(thd, prv_p); + } + } + else if (table->child_l) + { + /* + When closing a MERGE parent, detach the children first. It is + not necessary to clear the child or parent table reference of + this table because the TABLE is freed. But we need to clear + the child or parent references of the other belonging tables + so that they cannot be moved into the unused_tables chain with + these pointers set. + + This is used for example in ALTER TABLE t2 RENAME TO t5 under + LOCK TABLES where t2 is a MERGE parent: + CREATE TABLE t1 (c1 INT); + CREATE TABLE t2 (c1 INT) ENGINE=MRG_MYISAM UNION=(t1); + LOCK TABLES t1 WRITE, t2 WRITE; + ALTER TABLE t2 RENAME TO t5; + */ + detach_merge_children(table, TRUE); + } + + DBUG_VOID_RETURN; +} + + +/** @brief Remove all instances of table from thread's open list and table cache. @@ -1868,7 +2063,7 @@ static void relink_unused(TABLE *table) FALSE - otherwise @note When unlock parameter is FALSE or current thread doesn't have - any tables locked with LOCK TABLES tables are assumed to be + any tables locked with LOCK TABLES, tables are assumed to be not locked (for example already unlocked). */ @@ -1876,31 +2071,45 @@ void unlink_open_table(THD *thd, TABLE *find, bool unlock) { char key[MAX_DBKEY_LENGTH]; uint key_length= find->s->table_cache_key.length; - TABLE *list, **prev, *next; + TABLE *list, **prev; DBUG_ENTER("unlink_open_table"); safe_mutex_assert_owner(&LOCK_open); - list= thd->open_tables; - prev= &thd->open_tables; memcpy(key, find->s->table_cache_key.str, key_length); - for (; list ; list=next) + /* + Note that we need to hold LOCK_open while changing the + open_tables list. Another thread may work on it. + (See: remove_table_from_cache(), mysql_wait_completed_table()) + Closing a MERGE child before the parent would be fatal if the + other thread tries to abort the MERGE lock in between. + */ + for (prev= &thd->open_tables; *prev; ) { - next=list->next; + list= *prev; + if (list->s->table_cache_key.length == key_length && !memcmp(list->s->table_cache_key.str, key, key_length)) { if (unlock && thd->locked_tables) - mysql_lock_remove(thd, thd->locked_tables, list, TRUE); + mysql_lock_remove(thd, thd->locked_tables, + list->parent ? list->parent : list, TRUE); + + /* Prepare MERGE table for close. Close parent if necessary. */ + unlink_open_merge(thd, list, &prev); + + /* Remove table from open_tables list. */ + *prev= list->next; + /* Close table. */ VOID(hash_delete(&open_cache,(uchar*) list)); // Close table } else { - *prev=list; // put in use list + /* Step to next entry in open_tables list. */ prev= &list->next; } } - *prev=0; + // Notify any 'refresh' threads broadcast_refresh(); DBUG_VOID_RETURN; @@ -1990,6 +2199,41 @@ void wait_for_condition(THD *thd, pthread_mutex_t *mutex, pthread_cond_t *cond) } +/** + Exclusively name-lock a table that is already write-locked by the + current thread. + + @param thd current thread context + @param tables able list containing one table to open. + + @return FALSE on success, TRUE otherwise. +*/ + +bool name_lock_locked_table(THD *thd, TABLE_LIST *tables) +{ + DBUG_ENTER("name_lock_locked_table"); + + /* Under LOCK TABLES we must only accept write locked tables. */ + tables->table= find_locked_table(thd, tables->db, tables->table_name); + + if (!tables->table) + my_error(ER_TABLE_NOT_LOCKED, MYF(0), tables->alias); + else if (tables->table->reginfo.lock_type < TL_WRITE_LOW_PRIORITY) + my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), tables->alias); + else + { + /* + Ensures that table is opened only by this thread and that no + other statement will open this table. + */ + wait_while_table_is_used(thd, tables->table, HA_EXTRA_FORCE_REOPEN); + DBUG_RETURN(FALSE); + } + + DBUG_RETURN(TRUE); +} + + /* Open table which is already name-locked by this thread. @@ -2383,9 +2627,14 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, table->s->table_name.str); DBUG_RETURN(0); } + /* + When looking for a usable TABLE, ignore MERGE children, as they + belong to their parent and cannot be used explicitly. + */ if (!my_strcasecmp(system_charset_info, table->alias, alias) && table->query_id != thd->query_id && /* skip tables already used */ - !(thd->prelocked_mode && table->query_id)) + !(thd->prelocked_mode && table->query_id) && + !table->parent) { int distance= ((int) table->reginfo.lock_type - (int) table_list->lock_type); @@ -2516,7 +2765,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, deadlock may occur. */ if (thd->handler_tables) - mysql_ha_flush(thd, (TABLE_LIST*) NULL, MYSQL_HA_REOPEN_ON_USAGE, TRUE); + mysql_ha_flush(thd); /* Actually try to find the table in the open_cache. @@ -2534,6 +2783,8 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, table= (TABLE*) hash_next(&open_cache, (uchar*) key, key_length, &state)) { + DBUG_PRINT("tcache", ("in_use table: '%s'.'%s' 0x%lx", table->s->db.str, + table->s->table_name.str, (long) table)); /* Here we flush tables marked for flush. Normally, table->s->version contains the value of @@ -2622,6 +2873,8 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, } if (table) { + DBUG_PRINT("tcache", ("unused table: '%s'.'%s' 0x%lx", table->s->db.str, + table->s->table_name.str, (long) table)); /* Unlink the table from "unused_tables" list. */ if (table == unused_tables) { // First unused @@ -2637,6 +2890,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, { /* Insert a new TABLE instance into the open cache */ int error; + DBUG_PRINT("tcache", ("opening new table")); /* Free cache if too big */ while (open_cache.records > table_cache_size && unused_tables) VOID(hash_delete(&open_cache,(uchar*) unused_tables)); /* purecov: tested */ @@ -2703,7 +2957,9 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(0); // VIEW } - DBUG_PRINT("info", ("inserting table 0x%lx into the cache", (long) table)); + DBUG_PRINT("info", ("inserting table '%s'.'%s' 0x%lx into the cache", + table->s->db.str, table->s->table_name.str, + (long) table)); VOID(my_hash_insert(&open_cache,(uchar*) table)); } @@ -2793,9 +3049,12 @@ bool reopen_table(TABLE *table) TABLE_LIST table_list; THD *thd= table->in_use; DBUG_ENTER("reopen_table"); + DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str, + table->s->table_name.str, (long) table)); DBUG_ASSERT(table->s->ref_count == 0); DBUG_ASSERT(!table->sort.io_cache); + DBUG_ASSERT(!table->children_attached); #ifdef EXTRA_DEBUG if (table->db_stat) @@ -2836,6 +3095,17 @@ bool reopen_table(TABLE *table) tmp.next= table->next; tmp.prev= table->prev; + /* Preserve MERGE parent. */ + tmp.parent= table->parent; + /* Fix MERGE child list and check for unchanged union. */ + if ((table->child_l || tmp.child_l) && + fix_merge_after_open(table->child_l, table->child_last_l, + tmp.child_l, tmp.child_last_l)) + { + VOID(closefrm(&tmp, 1)); // close file, free everything + goto end; + } + delete table->triggers; if (table->file) VOID(closefrm(table, 1)); // close file, free everything @@ -2857,6 +3127,11 @@ bool reopen_table(TABLE *table) } if (table->triggers) table->triggers->set_table(table); + /* + Do not attach MERGE children here. The children might be reopened + after the parent. Attach children after reopening all tables that + require reopen. See for example reopen_tables(). + */ broadcast_refresh(); error=0; @@ -2878,6 +3153,9 @@ bool reopen_table(TABLE *table) then there is only one table open and locked. This means that the function probably has to be adjusted before it can be used anywhere outside ALTER TABLE. + + @note Must not use TABLE_SHARE::table_name/db of the table being closed, + the strings are used in a loop even after the share may be freed. */ void close_data_files_and_morph_locks(THD *thd, const char *db, @@ -2909,7 +3187,22 @@ void close_data_files_and_morph_locks(THD *thd, const char *db, !strcmp(table->s->db.str, db)) { if (thd->locked_tables) - mysql_lock_remove(thd, thd->locked_tables, table, TRUE); + { + if (table->parent) + { + /* + If MERGE child, need to reopen parent too. This means that + the first child to be closed will detach all children from + the parent and close it. OTOH in most cases a MERGE table + won't have multiple children with the same db.table_name. + */ + mysql_lock_remove(thd, thd->locked_tables, table->parent, TRUE); + table->parent->open_placeholder= 1; + close_handle_and_leave_table_as_lock(table->parent); + } + else + mysql_lock_remove(thd, thd->locked_tables, table, TRUE); + } table->open_placeholder= 1; close_handle_and_leave_table_as_lock(table); } @@ -2919,6 +3212,62 @@ void close_data_files_and_morph_locks(THD *thd, const char *db, /** + @brief Reattach MERGE children after reopen. + + @param[in] thd thread context + @param[in,out] err_tables_p pointer to pointer of tables in error + + @return status + @retval FALSE OK, err_tables_p unchanged + @retval TRUE Error, err_tables_p contains table(s) +*/ + +static bool reattach_merge(THD *thd, TABLE **err_tables_p) +{ + TABLE *table; + TABLE *next; + TABLE **prv_p= &thd->open_tables; + bool error= FALSE; + DBUG_ENTER("reattach_merge"); + + for (table= thd->open_tables; table; table= next) + { + next= table->next; + DBUG_PRINT("tcache", ("check table: '%s'.'%s' 0x%lx next: 0x%lx", + table->s->db.str, table->s->table_name.str, + (long) table, (long) next)); + /* Reattach children for MERGE tables with "closed data files" only. */ + if (table->child_l && !table->children_attached) + { + DBUG_PRINT("tcache", ("MERGE parent, attach children")); + if(table->file->extra(HA_EXTRA_ATTACH_CHILDREN)) + { + my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias); + error= TRUE; + /* Remove table from open_tables. */ + *prv_p= next; + if (next) + prv_p= &next->next; + /* Stack table on error list. */ + table->next= *err_tables_p; + *err_tables_p= table; + continue; + } + else + { + table->children_attached= TRUE; + DBUG_PRINT("myrg", ("attached parent: '%s'.'%s' 0x%lx", + table->s->db.str, + table->s->table_name.str, (long) table)); + } + } + prv_p= &table->next; + } + DBUG_RETURN(error); +} + + +/** @brief Reopen all tables with closed data files. @param thd Thread context @@ -2942,7 +3291,9 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) { TABLE *table,*next,**prev; TABLE **tables,**tables_ptr; // For locks + TABLE *err_tables= NULL; bool error=0, not_used; + bool merge_table_found= FALSE; DBUG_ENTER("reopen_tables"); if (!thd->open_tables) @@ -2951,10 +3302,15 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) safe_mutex_assert_owner(&LOCK_open); if (get_locks) { - /* The ptr is checked later */ + /* + The ptr is checked later + Do not handle locks of MERGE children. + */ uint opens=0; for (table= thd->open_tables; table ; table=table->next) - opens++; + if (!table->parent) + opens++; + DBUG_PRINT("tcache", ("open tables to lock: %u", opens)); tables= (TABLE**) my_alloca(sizeof(TABLE*)*opens); } else @@ -2966,17 +3322,37 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) { uint db_stat=table->db_stat; next=table->next; + DBUG_PRINT("tcache", ("open table: '%s'.'%s' 0x%lx " + "parent: 0x%lx db_stat: %u", + table->s->db.str, table->s->table_name.str, + (long) table, (long) table->parent, db_stat)); + if (table->child_l && !db_stat) + merge_table_found= TRUE; if (!tables || (!db_stat && reopen_table(table))) { my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias); + /* + If we could not allocate 'tables', we may close open tables + here. If a MERGE table is affected, detach the children first. + It is not necessary to clear the child or parent table reference + of this table because the TABLE is freed. But we need to clear + the child or parent references of the other belonging tables so + that they cannot be moved into the unused_tables chain with + these pointers set. + */ + if (table->child_l || table->parent) + detach_merge_children(table, TRUE); VOID(hash_delete(&open_cache,(uchar*) table)); error=1; } else { + DBUG_PRINT("tcache", ("opened. need lock: %d", + get_locks && !db_stat && !table->parent)); *prev= table; prev= &table->next; - if (get_locks && !db_stat) + /* Do not handle locks of MERGE children. */ + if (get_locks && !db_stat && !table->parent) *tables_ptr++= table; // need new lock on this if (in_refresh) { @@ -2985,25 +3361,52 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) } } } + *prev=0; + /* + When all tables are open again, we can re-attach MERGE children to + their parents. All TABLE objects are still present. + */ + DBUG_PRINT("tcache", ("re-attaching MERGE tables: %d", merge_table_found)); + if (!error && merge_table_found && reattach_merge(thd, &err_tables)) + { + while (err_tables) + { + VOID(hash_delete(&open_cache, (uchar*) err_tables)); + err_tables= err_tables->next; + } + } + DBUG_PRINT("tcache", ("open tables to lock: %u", + (uint) (tables_ptr - tables))); if (tables != tables_ptr) // Should we get back old locks { MYSQL_LOCK *lock; - /* We should always get these locks */ + /* + We should always get these locks. Anyway, we must not go into + wait_for_tables() as it tries to acquire LOCK_open, which is + already locked. + */ thd->some_tables_deleted=0; if ((lock= mysql_lock_tables(thd, tables, (uint) (tables_ptr - tables), - 0, ¬_used))) + MYSQL_LOCK_NOTIFY_IF_NEED_REOPEN, ¬_used))) { thd->locked_tables=mysql_lock_merge(thd->locked_tables,lock); } else + { + /* + This case should only happen if there is a bug in the reopen logic. + Need to issue error message to have a reply for the application. + Not exactly what happened though, but close enough. + */ + my_error(ER_LOCK_DEADLOCK, MYF(0)); error=1; + } } if (get_locks && tables) { my_afree((uchar*) tables); } broadcast_refresh(); - *prev=0; DBUG_RETURN(error); } @@ -3022,14 +3425,19 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) @param send_refresh Should we awake waiters even if we didn't close any tables? */ -void close_old_data_files(THD *thd, TABLE *table, bool morph_locks, - bool send_refresh) +static void close_old_data_files(THD *thd, TABLE *table, bool morph_locks, + bool send_refresh) { bool found= send_refresh; DBUG_ENTER("close_old_data_files"); for (; table ; table=table->next) { + DBUG_PRINT("tcache", ("checking table: '%s'.'%s' 0x%lx", + table->s->db.str, table->s->table_name.str, + (long) table)); + DBUG_PRINT("tcache", ("needs refresh: %d is open: %u", + table->needs_reopen_or_name_lock(), table->db_stat)); /* Reopen marked for flush. */ @@ -3041,13 +3449,33 @@ void close_old_data_files(THD *thd, TABLE *table, bool morph_locks, if (morph_locks) { /* - Wake up threads waiting for table-level lock on this table - so they won't sneak in when we will temporarily remove our - lock on it. This will also give them a chance to close their - instances of this table. + Forward lock handling to MERGE parent. But unlock parent + once only. */ - mysql_lock_abort(thd, table, TRUE); - mysql_lock_remove(thd, thd->locked_tables, table, TRUE); + TABLE *ulcktbl= table->parent ? table->parent : table; + if (ulcktbl->lock_count) + { + /* + Wake up threads waiting for table-level lock on this table + so they won't sneak in when we will temporarily remove our + lock on it. This will also give them a chance to close their + instances of this table. + */ + mysql_lock_abort(thd, ulcktbl, TRUE); + mysql_lock_remove(thd, thd->locked_tables, ulcktbl, TRUE); + ulcktbl->lock_count= 0; + } + if ((ulcktbl != table) && ulcktbl->db_stat) + { + /* + Close the parent too. Note that parent can come later in + the list of tables. It will then be noticed as closed and + as a placeholder. When this happens, do not clear the + placeholder flag. See the branch below ("***"). + */ + ulcktbl->open_placeholder= 1; + close_handle_and_leave_table_as_lock(ulcktbl); + } /* We want to protect the table from concurrent DDL operations (like RENAME TABLE) until we will re-open and re-lock it. @@ -3056,7 +3484,7 @@ void close_old_data_files(THD *thd, TABLE *table, bool morph_locks, } close_handle_and_leave_table_as_lock(table); } - else if (table->open_placeholder) + else if (table->open_placeholder && !morph_locks) { /* We come here only in close-for-back-off scenario. So we have to @@ -3064,8 +3492,11 @@ void close_old_data_files(THD *thd, TABLE *table, bool morph_locks, in case of concurrent execution of CREATE TABLE t1 SELECT * FROM t2 and RENAME TABLE t2 TO t1). In close-for-re-open scenario we will probably want to let it stay. + + Note "***": We must not enter this branch if the placeholder + flag has been set because of a former close through a child. + See above the comment that refers to this note. */ - DBUG_ASSERT(!morph_locks); table->open_placeholder= 0; } } @@ -3136,7 +3567,7 @@ bool wait_for_tables(THD *thd) { thd->some_tables_deleted=0; close_old_data_files(thd,thd->open_tables,0,dropping_tables != 0); - mysql_ha_flush(thd, (TABLE_LIST*) NULL, MYSQL_HA_REOPEN_ON_USAGE, TRUE); + mysql_ha_flush(thd); if (!table_is_used(thd->open_tables,1)) break; (void) pthread_cond_wait(&COND_refresh,&LOCK_open); @@ -3186,13 +3617,29 @@ TABLE *drop_locked_tables(THD *thd,const char *db, const char *table_name) prev= &thd->open_tables; DBUG_ENTER("drop_locked_tables"); + /* + Note that we need to hold LOCK_open while changing the + open_tables list. Another thread may work on it. + (See: remove_table_from_cache(), mysql_wait_completed_table()) + Closing a MERGE child before the parent would be fatal if the + other thread tries to abort the MERGE lock in between. + */ for (table= thd->open_tables; table ; table=next) { next=table->next; if (!strcmp(table->s->table_name.str, table_name) && !strcmp(table->s->db.str, db)) { - mysql_lock_remove(thd, thd->locked_tables, table, TRUE); + /* If MERGE child, forward lock handling to parent. */ + mysql_lock_remove(thd, thd->locked_tables, + table->parent ? table->parent : table, TRUE); + /* + When closing a MERGE parent or child table, detach the children first. + Clear child table references in case this object is opened again. + */ + if (table->child_l || table->parent) + detach_merge_children(table, TRUE); + if (!found) { found= table; @@ -3241,7 +3688,8 @@ void abort_locked_tables(THD *thd,const char *db, const char *table_name) if (!strcmp(table->s->table_name.str, table_name) && !strcmp(table->s->db.str, db)) { - mysql_lock_abort(thd,table, TRUE); + /* If MERGE child, forward lock handling to parent. */ + mysql_lock_abort(thd, table->parent ? table->parent : table, TRUE); break; } } @@ -3272,7 +3720,8 @@ void abort_locked_tables(THD *thd,const char *db, const char *table_name) share->table_map_id is given a value that with a high certainty is not used by any other table (the only case where a table id can be reused is on wrap-around, which means more than 4 billion table - shares open at the same time). + share opens have been executed while one table was open all the + time). share->table_map_id is not ~0UL. */ @@ -3511,6 +3960,340 @@ err: } +/** + @brief Add list of MERGE children to a TABLE_LIST list. + + @param[in] tlist the parent TABLE_LIST object just opened + + @return status + @retval 0 OK + @retval != 0 Error + + @detail + When a MERGE parent table has just been opened, insert the + TABLE_LIST chain from the MERGE handle into the table list used for + opening tables for this statement. This lets the children be opened + too. +*/ + +static int add_merge_table_list(TABLE_LIST *tlist) +{ + TABLE *parent= tlist->table; + TABLE_LIST *child_l; + DBUG_ENTER("add_merge_table_list"); + DBUG_PRINT("myrg", ("table: '%s'.'%s' 0x%lx", parent->s->db.str, + parent->s->table_name.str, (long) parent)); + + /* Must not call this with attached children. */ + DBUG_ASSERT(!parent->children_attached); + /* Must not call this with children list in place. */ + DBUG_ASSERT(tlist->next_global != parent->child_l); + /* Prevent inclusion of another MERGE table. Could make infinite recursion. */ + if (tlist->parent_l) + { + my_error(ER_ADMIN_WRONG_MRG_TABLE, MYF(0), tlist->alias); + DBUG_RETURN(1); + } + + /* Fix children.*/ + for (child_l= parent->child_l; ; child_l= child_l->next_global) + { + /* + Note: child_l->table may still be set if this parent was taken + from the unused_tables chain. Ignore this fact here. The + reference will be replaced by the handler in + ::extra(HA_EXTRA_ATTACH_CHILDREN). + */ + + /* Set lock type. */ + child_l->lock_type= tlist->lock_type; + + /* Set parent reference. */ + child_l->parent_l= tlist; + + /* Break when this was the last child. */ + if (&child_l->next_global == parent->child_last_l) + break; + } + + /* Insert children into the table list. */ + *parent->child_last_l= tlist->next_global; + tlist->next_global= parent->child_l; + + /* + Do not fix the prev_global pointers. We will remove the + chain soon anyway. + */ + + DBUG_RETURN(0); +} + + +/** + @brief Attach MERGE children to the parent. + + @param[in] tlist the child TABLE_LIST object just opened + + @return status + @retval 0 OK + @retval != 0 Error + + @note + This is called when the last MERGE child has just been opened, let + the handler attach the MyISAM tables to the MERGE table. Remove + MERGE TABLE_LIST chain from the statement list so that it cannot be + changed or freed. +*/ + +static int attach_merge_children(TABLE_LIST *tlist) +{ + TABLE *parent= tlist->parent_l->table; + int error; + DBUG_ENTER("attach_merge_children"); + DBUG_PRINT("myrg", ("table: '%s'.'%s' 0x%lx", parent->s->db.str, + parent->s->table_name.str, (long) parent)); + + /* Must not call this with attached children. */ + DBUG_ASSERT(!parent->children_attached); + /* Must call this with children list in place. */ + DBUG_ASSERT(tlist->parent_l->next_global == parent->child_l); + + /* Attach MyISAM tables to MERGE table. */ + error= parent->file->extra(HA_EXTRA_ATTACH_CHILDREN); + + /* + Remove children from the table list. Even in case of an error. + This should prevent tampering with them. + */ + tlist->parent_l->next_global= *parent->child_last_l; + + /* + Do not fix the last childs next_global pointer. It is needed for + stepping to the next table in the enclosing loop in open_tables(). + Do not fix prev_global pointers. We did not set them. + */ + + if (error) + { + DBUG_PRINT("error", ("attaching MERGE children failed: %d", my_errno)); + parent->file->print_error(error, MYF(0)); + DBUG_RETURN(1); + } + + parent->children_attached= TRUE; + DBUG_PRINT("myrg", ("attached parent: '%s'.'%s' 0x%lx", parent->s->db.str, + parent->s->table_name.str, (long) parent)); + + /* + Note that we have the cildren in the thd->open_tables list at this + point. + */ + + DBUG_RETURN(0); +} + + +/** + @brief Detach MERGE children from the parent. + + @note + Call this before the first table of a MERGE table (parent or child) + is closed. + + When closing thread tables at end of statement, both parent and + children are in thd->open_tables and will be closed. In most cases + the children will be closed before the parent. They are opened after + the parent and thus stacked into thd->open_tables before it. + + To avoid that we touch a closed children in any way, we must detach + the children from the parent when the first belonging table is + closed (parent or child). + + All references to the children should be removed on handler level + and optionally on table level. + + @note + Assure that you call it for a MERGE parent or child only. + Either table->child_l or table->parent must be set. + + @param[in] table the TABLE object of the parent + @param[in] clear_refs if to clear TABLE references + this must be true when called from + close_thread_tables() to enable fresh + open in open_tables() + it must be false when called in preparation + for reopen_tables() +*/ + +void detach_merge_children(TABLE *table, bool clear_refs) +{ + TABLE_LIST *child_l; + TABLE *parent= table->child_l ? table : table->parent; + bool first_detach; + DBUG_ENTER("detach_merge_children"); + /* + Either table->child_l or table->parent must be set. Parent must have + child_l set. + */ + DBUG_ASSERT(parent && parent->child_l); + DBUG_PRINT("myrg", ("table: '%s'.'%s' 0x%lx clear_refs: %d", + table->s->db.str, table->s->table_name.str, + (long) table, clear_refs)); + DBUG_PRINT("myrg", ("parent: '%s'.'%s' 0x%lx", parent->s->db.str, + parent->s->table_name.str, (long) parent)); + + /* + In a open_tables() loop it can happen that not all tables have their + children attached yet. Also this is called for every child and the + parent from close_thread_tables(). + */ + if ((first_detach= parent->children_attached)) + { + VOID(parent->file->extra(HA_EXTRA_DETACH_CHILDREN)); + parent->children_attached= FALSE; + DBUG_PRINT("myrg", ("detached parent: '%s'.'%s' 0x%lx", parent->s->db.str, + parent->s->table_name.str, (long) parent)); + } + else + DBUG_PRINT("myrg", ("parent is already detached")); + + if (clear_refs) + { + /* In any case clear the own parent reference. (***) */ + table->parent= NULL; + + /* + On the first detach, clear all references. If this table is the + parent, we still may need to clear the child references. The first + detach might not have done this. + */ + if (first_detach || (table == parent)) + { + /* Clear TABLE references to force new assignment at next open. */ + for (child_l= parent->child_l; ; child_l= child_l->next_global) + { + /* + Do not DBUG_ASSERT(child_l->table); open_tables might be + incomplete. + + Clear the parent reference of the children only on the first + detach. The children might already be closed. They will clear + it themseves when this function is called for them with + 'clear_refs' true. See above "(***)". + */ + if (first_detach && child_l->table) + child_l->table->parent= NULL; + + /* Clear the table reference to force new assignment at next open. */ + child_l->table= NULL; + + /* Break when this was the last child. */ + if (&child_l->next_global == parent->child_last_l) + break; + } + } + } + + DBUG_VOID_RETURN; +} + + +/** + @brief Fix MERGE children after open. + + @param[in] old_child_list first list member from original table + @param[in] old_last pointer to &next_global of last list member + @param[in] new_child_list first list member from freshly opened table + @param[in] new_last pointer to &next_global of last list member + + @return mismatch + @retval FALSE OK, no mismatch + @retval TRUE Error, lists mismatch + + @detail + Main action is to copy TABLE reference for each member of original + child list to new child list. After a fresh open these references + are NULL. Assign the old children to the new table. Some of them + might also be reopened or will be reopened soon. + + Other action is to verify that the table definition with respect to + the UNION list did not change. + + @note + This function terminates the child list if the respective '*_last' + pointer is non-NULL. Do not call it from a place where the list is + embedded in another list and this would break it. + + Terminating the list is required for example in the first + reopen_table() after open_tables(). open_tables() requires the end + of the list not to be terminated because other tables could follow + behind the child list. + + If a '*_last' pointer is NULL, the respective list is assumed to be + NULL terminated. +*/ + +bool fix_merge_after_open(TABLE_LIST *old_child_list, TABLE_LIST **old_last, + TABLE_LIST *new_child_list, TABLE_LIST **new_last) +{ + bool mismatch= FALSE; + DBUG_ENTER("fix_merge_after_open"); + DBUG_PRINT("myrg", ("old last addr: 0x%lx new last addr: 0x%lx", + (long) old_last, (long) new_last)); + + /* Terminate the lists for easier check of list end. */ + if (old_last) + *old_last= NULL; + if (new_last) + *new_last= NULL; + + for (;;) + { + DBUG_PRINT("myrg", ("old list item: 0x%lx new list item: 0x%lx", + (long) old_child_list, (long) new_child_list)); + /* Break if one of the list is at its end. */ + if (!old_child_list || !new_child_list) + break; + /* Old table has references to child TABLEs. */ + DBUG_ASSERT(old_child_list->table); + /* New table does not yet have references to child TABLEs. */ + DBUG_ASSERT(!new_child_list->table); + DBUG_PRINT("myrg", ("old table: '%s'.'%s' new table: '%s'.'%s'", + old_child_list->db, old_child_list->table_name, + new_child_list->db, new_child_list->table_name)); + /* Child db.table names must match. */ + if (strcmp(old_child_list->table_name, new_child_list->table_name) || + strcmp(old_child_list->db, new_child_list->db)) + break; + /* + Copy TABLE reference. Child TABLE objects are still in place + though not necessarily open yet. + */ + DBUG_PRINT("myrg", ("old table ref: 0x%lx replaces new table ref: 0x%lx", + (long) old_child_list->table, + (long) new_child_list->table)); + new_child_list->table= old_child_list->table; + /* Step both lists. */ + old_child_list= old_child_list->next_global; + new_child_list= new_child_list->next_global; + } + DBUG_PRINT("myrg", ("end of list, mismatch: %d", mismatch)); + /* + If the list pointers are not both NULL after the loop, then the + lists differ. If the are both identical, but not NULL, then they + have at least one table in common and hence the rest of the list + would be identical too. But in this case the loop woul run until the + list end, where both pointers would become NULL. + */ + if (old_child_list != new_child_list) + mismatch= TRUE; + if (mismatch) + my_error(ER_TABLE_DEF_CHANGED, MYF(0)); + + DBUG_RETURN(mismatch); +} + + /* Open all tables in list @@ -3541,7 +4324,7 @@ err: int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) { - TABLE_LIST *tables; + TABLE_LIST *tables= NULL; bool refresh; int result=0; MEM_ROOT new_frm_mem; @@ -3554,7 +4337,7 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) temporary mem_root for new .frm parsing. TODO: variables for size */ - init_alloc_root(&new_frm_mem, 8024, 8024); + init_sql_alloc(&new_frm_mem, 8024, 8024); thd->current_tablenr= 0; restart: @@ -3601,6 +4384,9 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) */ for (tables= *start; tables ;tables= tables->next_global) { + DBUG_PRINT("tcache", ("opening table: '%s'.'%s' item: 0x%lx", + tables->db, tables->table_name, (long) tables)); + safe_to_ignore_table= FALSE; /* @@ -3652,6 +4438,10 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) else tables->table= open_table(thd, tables, &new_frm_mem, &refresh, flags); } + else + DBUG_PRINT("tcache", ("referenced table: '%s'.'%s' 0x%lx", + tables->db, tables->table_name, + (long) tables->table)); if (!tables->table) { @@ -3683,6 +4473,19 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) goto process_view_routines; } + /* + If in a MERGE table open, we need to remove the children list + from statement table list before restarting. Otherwise the list + will be inserted another time. + */ + if (tables->parent_l) + { + TABLE_LIST *parent_l= tables->parent_l; + /* The parent table should be correctly open at this point. */ + DBUG_ASSERT(parent_l->table); + parent_l->next_global= *parent_l->table->child_last_l; + } + if (refresh) // Refresh in progress { /* @@ -3751,6 +4554,24 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) thd->update_lock_default : tables->lock_type; tables->table->grant= tables->grant; + /* Attach MERGE children if not locked already. */ + DBUG_PRINT("tcache", ("is parent: %d is child: %d", + test(tables->table->child_l), + test(tables->parent_l))); + DBUG_PRINT("tcache", ("in lock tables: %d in prelock mode: %d", + test(thd->locked_tables), test(thd->prelocked_mode))); + if (((!thd->locked_tables && !thd->prelocked_mode) || + tables->table->s->tmp_table) && + ((tables->table->child_l && + add_merge_table_list(tables)) || + (tables->parent_l && + (&tables->next_global == tables->parent_l->table->child_last_l) && + attach_merge_children(tables)))) + { + result= -1; + goto err; + } + process_view_routines: /* Again we may need cache all routines used by this view and add @@ -3783,6 +4604,18 @@ process_view_routines: if (query_tables_last_own) thd->lex->mark_as_requiring_prelocking(query_tables_last_own); + if (result && tables) + { + /* + Some functions determine success as (tables->table != NULL). + tables->table is in thd->open_tables. It won't go lost. If the + error happens on a MERGE child, clear the parents TABLE reference. + */ + if (tables->parent_l) + tables->parent_l->table= NULL; + tables->table= NULL; + } + DBUG_PRINT("tcache", ("returning: %d", result)); DBUG_RETURN(result); } @@ -3822,6 +4655,63 @@ static bool check_lock_and_start_stmt(THD *thd, TABLE *table, } +/** + @brief Open and lock one table + + @param[in] thd thread handle + @param[in] table_l table to open is first table in this list + @param[in] lock_type lock to use for table + + @return table + @retval != NULL OK, opened table returned + @retval NULL Error + + @note + If ok, the following are also set: + table_list->lock_type lock_type + table_list->table table + + @note + If table_l is a list, not a single table, the list is temporarily + broken. + + @detail + This function is meant as a replacement for open_ltable() when + MERGE tables can be opened. open_ltable() cannot open MERGE tables. + + There may be more differences between open_n_lock_single_table() and + open_ltable(). One known difference is that open_ltable() does + neither call decide_logging_format() nor handle some other logging + and locking issues because it does not call lock_tables(). +*/ + +TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l, + thr_lock_type lock_type) +{ + TABLE_LIST *save_next_global; + DBUG_ENTER("open_n_lock_single_table"); + + /* Remember old 'next' pointer. */ + save_next_global= table_l->next_global; + /* Break list. */ + table_l->next_global= NULL; + + /* Set requested lock type. */ + table_l->lock_type= lock_type; + /* Allow to open real tables only. */ + table_l->required_type= FRMTYPE_TABLE; + + /* Open the table. */ + if (simple_open_n_lock_tables(thd, table_l)) + table_l->table= NULL; /* Just to be sure. */ + + /* Restore list. */ + table_l->next_global= save_next_global; + + DBUG_RETURN(table_l->table); +} + + /* Open and lock one table @@ -3863,6 +4753,17 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type, if (table) { + if (table->child_l) + { + /* A MERGE table must not come here. */ + /* purecov: begin tested */ + my_error(ER_WRONG_OBJECT, MYF(0), table->s->db.str, + table->s->table_name.str, "BASE TABLE"); + table= 0; + goto end; + /* purecov: end */ + } + table_list->lock_type= lock_type; table_list->table= table; table->grant= table_list->grant; @@ -3880,56 +4781,21 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type, table= 0; } } + + end: thd->proc_info=0; DBUG_RETURN(table); } /* - Open all tables in list and locks them for read without derived - tables processing. + Open all tables in list, locks them and optionally process derived tables. SYNOPSIS - simple_open_n_lock_tables() - thd - thread handler - tables - list of tables for open&locking - - RETURN - 0 - ok - -1 - error - - NOTE - The lock will automaticaly be freed by close_thread_tables() -*/ - -int simple_open_n_lock_tables(THD *thd, TABLE_LIST *tables) -{ - uint counter; - bool need_reopen; - DBUG_ENTER("simple_open_n_lock_tables"); - - for ( ; ; ) - { - if (open_tables(thd, &tables, &counter, 0)) - DBUG_RETURN(-1); - if (!lock_tables(thd, tables, counter, &need_reopen)) - break; - if (!need_reopen) - DBUG_RETURN(-1); - close_tables_for_reopen(thd, &tables); - } - DBUG_RETURN(0); -} - - -/* - Open all tables in list, locks them and process derived tables - tables processing. - - SYNOPSIS - open_and_lock_tables() + open_and_lock_tables_derived() thd - thread handler tables - list of tables for open&locking + derived - if to handle derived tables RETURN FALSE - ok @@ -3937,27 +4803,43 @@ int simple_open_n_lock_tables(THD *thd, TABLE_LIST *tables) NOTE The lock will automaticaly be freed by close_thread_tables() + + NOTE + There are two convenience functions: + - simple_open_n_lock_tables(thd, tables) without derived handling + - open_and_lock_tables(thd, tables) with derived handling + Both inline functions call open_and_lock_tables_derived() with + the third argument set appropriately. */ -bool open_and_lock_tables(THD *thd, TABLE_LIST *tables) +bool open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived) { uint counter; bool need_reopen; - DBUG_ENTER("open_and_lock_tables"); + DBUG_ENTER("open_and_lock_tables_derived"); + DBUG_PRINT("enter", ("derived handling: %d", derived)); for ( ; ; ) { if (open_tables(thd, &tables, &counter, 0)) DBUG_RETURN(-1); + + DBUG_EXECUTE_IF("sleep_open_and_lock_after_open", { + const char *old_proc_info= thd->proc_info; + thd->proc_info= "DBUG sleep"; + my_sleep(6000000); + thd->proc_info= old_proc_info;}); + if (!lock_tables(thd, tables, counter, &need_reopen)) break; if (!need_reopen) DBUG_RETURN(-1); close_tables_for_reopen(thd, &tables); } - if (mysql_handle_derived(thd->lex, &mysql_derived_prepare) || - (thd->fill_derived_tables() && - mysql_handle_derived(thd->lex, &mysql_derived_filling))) + if (derived && + (mysql_handle_derived(thd->lex, &mysql_derived_prepare) || + (thd->fill_derived_tables() && + mysql_handle_derived(thd->lex, &mysql_derived_filling)))) DBUG_RETURN(TRUE); /* purecov: inspected */ DBUG_RETURN(0); } @@ -4271,7 +5153,17 @@ int lock_tables(THD *thd, TABLE_LIST *tables, uint count, bool *need_reopen) thd->lock= 0; thd->in_lock_tables=0; - for (table= tables; table != first_not_own; table= table->next_global) + /* + When open_and_lock_tables() is called for a single table out of + a table list, the 'next_global' chain is temporarily broken. We + may not find 'first_not_own' before the end of the "list". + Look for example at those places where open_n_lock_single_table() + is called. That function implements the temporary breaking of + a table list for opening a single table. + */ + for (table= tables; + table && table != first_not_own; + table= table->next_global) { if (!table->placeholder()) { @@ -4298,7 +5190,17 @@ int lock_tables(THD *thd, TABLE_LIST *tables, uint count, bool *need_reopen) else { TABLE_LIST *first_not_own= thd->lex->first_not_own_table(); - for (table= tables; table != first_not_own; table= table->next_global) + /* + When open_and_lock_tables() is called for a single table out of + a table list, the 'next_global' chain is temporarily broken. We + may not find 'first_not_own' before the end of the "list". + Look for example at those places where open_n_lock_single_table() + is called. That function implements the temporary breaking of + a table list for opening a single table. + */ + for (table= tables; + table && table != first_not_own; + table= table->next_global) { if (!table->placeholder() && check_lock_and_start_stmt(thd, table->table, table->lock_type)) @@ -4401,7 +5303,7 @@ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, saved_cache_key= strmov(tmp_path, path)+1; memcpy(saved_cache_key, cache_key, key_length); - init_tmp_table_share(share, saved_cache_key, key_length, + init_tmp_table_share(thd, share, saved_cache_key, key_length, strend(saved_cache_key)+1, tmp_path); if (open_table_def(thd, share, 0) || @@ -4434,6 +5336,8 @@ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, slave_open_temp_tables++; } tmp_table->pos_in_table_list= 0; + DBUG_PRINT("tmptable", ("opened table: '%s'.'%s' 0x%lx", tmp_table->s->db.str, + tmp_table->s->table_name.str, (long) tmp_table)); DBUG_RETURN(tmp_table); } @@ -7115,7 +8019,7 @@ my_bool mysql_rm_tmp_tables(void) /* We should cut file extention before deleting of table */ memcpy(filePathCopy, filePath, filePath_len - ext_len); filePathCopy[filePath_len - ext_len]= 0; - init_tmp_table_share(&share, "", 0, "", filePathCopy); + init_tmp_table_share(thd, &share, "", 0, "", filePathCopy); if (!open_table_def(thd, &share, 0) && ((handler_file= get_new_handler(&share, thd->mem_root, share.db_type())))) @@ -7217,7 +8121,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, TABLE_SHARE *share; bool result= 0, signalled= 0; DBUG_ENTER("remove_table_from_cache"); - DBUG_PRINT("enter", ("Table: '%s.%s' flags: %u", db, table_name, flags)); + DBUG_PRINT("enter", ("table: '%s'.'%s' flags: %u", db, table_name, flags)); key_length=(uint) (strmov(strmov(key,db)+1,table_name)-key)+1; for (;;) @@ -7232,6 +8136,8 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, &state)) { THD *in_use; + DBUG_PRINT("tcache", ("found table: '%s'.'%s' 0x%lx", table->s->db.str, + table->s->table_name.str, (long) table)); table->s->version=0L; /* Free when thread is ready */ if (!(in_use=table->in_use)) @@ -7270,13 +8176,19 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, } /* Now we must abort all tables locks used by this thread - as the thread may be waiting to get a lock for another table + as the thread may be waiting to get a lock for another table. + Note that we need to hold LOCK_open while going through the + list. So that the other thread cannot change it. The other + thread must also hold LOCK_open whenever changing the + open_tables list. Aborting the MERGE lock after a child was + closed and before the parent is closed would be fatal. */ for (TABLE *thd_table= in_use->open_tables; thd_table ; thd_table= thd_table->next) { - if (thd_table->db_stat) // If table is open + /* Do not handle locks of MERGE children. */ + if (thd_table->db_stat && !thd_table->parent) // If table is open signalled|= mysql_lock_abort_for_thread(thd, thd_table); } } @@ -7479,7 +8391,9 @@ int abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt) lpt->old_lock_type= lpt->table->reginfo.lock_type; VOID(pthread_mutex_lock(&LOCK_open)); - mysql_lock_abort(lpt->thd, lpt->table, TRUE); + /* If MERGE child, forward lock handling to parent. */ + mysql_lock_abort(lpt->thd, lpt->table->parent ? lpt->table->parent : + lpt->table, TRUE); VOID(remove_table_from_cache(lpt->thd, lpt->db, lpt->table_name, flags)); VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(0); @@ -7500,14 +8414,18 @@ int abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt) We also downgrade locks after the upgrade to WRITE_ONLY */ +/* purecov: begin deadcode */ void close_open_tables_and_downgrade(ALTER_PARTITION_PARAM_TYPE *lpt) { VOID(pthread_mutex_lock(&LOCK_open)); remove_table_from_cache(lpt->thd, lpt->db, lpt->table_name, RTFC_WAIT_OTHER_THREAD_FLAG); VOID(pthread_mutex_unlock(&LOCK_open)); - mysql_lock_downgrade_write(lpt->thd, lpt->table, lpt->old_lock_type); + /* If MERGE child, forward lock handling to parent. */ + mysql_lock_downgrade_write(lpt->thd, lpt->table->parent ? lpt->table->parent : + lpt->table, lpt->old_lock_type); } +/* purecov: end */ /* @@ -7573,13 +8491,19 @@ void mysql_wait_completed_table(ALTER_PARTITION_PARAM_TYPE *lpt, TABLE *my_table } /* Now we must abort all tables locks used by this thread - as the thread may be waiting to get a lock for another table + as the thread may be waiting to get a lock for another table. + Note that we need to hold LOCK_open while going through the + list. So that the other thread cannot change it. The other + thread must also hold LOCK_open whenever changing the + open_tables list. Aborting the MERGE lock after a child was + closed and before the parent is closed would be fatal. */ for (TABLE *thd_table= in_use->open_tables; thd_table ; thd_table= thd_table->next) { - if (thd_table->db_stat) // If table is open + /* Do not handle locks of MERGE children. */ + if (thd_table->db_stat && !thd_table->parent) // If table is open mysql_lock_abort_for_thread(lpt->thd, thd_table); } } @@ -7589,8 +8513,10 @@ void mysql_wait_completed_table(ALTER_PARTITION_PARAM_TYPE *lpt, TABLE *my_table those in use for removal after completion. Now we also need to abort all that are locked and are not progressing due to being locked by our lock. We don't upgrade our lock here. + If MERGE child, forward lock handling to parent. */ - mysql_lock_abort(lpt->thd, my_table, FALSE); + mysql_lock_abort(lpt->thd, my_table->parent ? my_table->parent : my_table, + FALSE); VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_VOID_RETURN; } @@ -7849,6 +8775,13 @@ void close_performance_schema_table(THD *thd, Open_tables_state *backup) pthread_mutex_lock(&LOCK_open); found_old_table= false; + /* + Note that we need to hold LOCK_open while changing the + open_tables list. Another thread may work on it. + (See: remove_table_from_cache(), mysql_wait_completed_table()) + Closing a MERGE child before the parent would be fatal if the + other thread tries to abort the MERGE lock in between. + */ while (thd->open_tables) found_old_table|= close_thread_table(thd, &thd->open_tables); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 1ee490fbcc8..6ebd0e5f7dc 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -678,9 +678,7 @@ void THD::cleanup(void) lock=locked_tables; locked_tables=0; close_thread_tables(this); } - mysql_ha_flush(this, (TABLE_LIST*) 0, - MYSQL_HA_CLOSE_FINAL | MYSQL_HA_FLUSH_ALL, FALSE); - hash_free(&handler_tables_hash); + mysql_ha_cleanup(this); delete_dynamic(&user_var_events); hash_free(&user_vars); close_temporary_tables(this); @@ -818,7 +816,20 @@ void THD::awake(THD::killed_state state_to_set) if (!slave_thread) thread_scheduler.post_kill_notification(this); #ifdef SIGNAL_WITH_VIO_CLOSE - close_active_vio(); + if (this != current_thd) + { + /* + In addition to a signal, let's close the socket of the thread that + is being killed. This is to make sure it does not block if the + signal is lost. This needs to be done only on platforms where + signals are not a reliable interruption mechanism. + + If we're killing ourselves, we know that we're not blocked, so this + hack is not used. + */ + + close_active_vio(); + } #endif } if (mysys_var) diff --git a/sql/sql_class.h b/sql/sql_class.h index 63a662f07b2..ce035b34679 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1172,8 +1172,6 @@ public: THD_TRANS all; // Trans since BEGIN WORK THD_TRANS stmt; // Trans for current statement bool on; // see ha_enable_transaction() - XID xid; // transaction identifier - enum xa_states xa_state; // used by external XA only XID_STATE xid_state; Rows_log_event *m_pending_rows_event; @@ -1264,14 +1262,16 @@ public: We follow this logic: - when stmt starts, first_successful_insert_id_in_prev_stmt contains the first insert id successfully inserted by the previous stmt. - - as stmt makes progress, handler::insert_id_for_cur_row changes; every - time get_auto_increment() is called, auto_inc_intervals_for_binlog is - augmented with the reserved interval (if statement-based binlogging). + - as stmt makes progress, handler::insert_id_for_cur_row changes; + every time get_auto_increment() is called, + auto_inc_intervals_in_cur_stmt_for_binlog is augmented with the + reserved interval (if statement-based binlogging). - at first successful insertion of an autogenerated value, first_successful_insert_id_in_cur_stmt is set to handler::insert_id_for_cur_row. - - when stmt goes to binlog, auto_inc_intervals_for_binlog is - binlogged if non-empty. + - when stmt goes to binlog, + auto_inc_intervals_in_cur_stmt_for_binlog is binlogged if + non-empty. - when stmt ends, first_successful_insert_id_in_prev_stmt is set to first_successful_insert_id_in_cur_stmt. */ @@ -2491,6 +2491,11 @@ class multi_delete :public select_result_interceptor /* True if at least one table we delete from is not transactional */ bool normal_tables; bool delete_while_scanning; + /* + error handling (rollback and binlogging) can happen in send_eof() + so that afterward send_error() needs to find out that. + */ + bool error_handled; public: multi_delete(TABLE_LIST *dt, uint num_of_tables); @@ -2526,6 +2531,11 @@ class multi_update :public select_result_interceptor /* True if the update operation has made a change in a transactional table */ bool transactional_tables; bool ignore; + /* + error handling (rollback and binlogging) can happen in send_eof() + so that afterward send_error() needs to find out that. + */ + bool error_handled; public: multi_update(TABLE_LIST *ut, TABLE_LIST *leaves_list, diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 88902e65a42..f669a242508 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -883,6 +883,13 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) VOID(pthread_mutex_lock(&LOCK_mysql_create_db)); + /* + This statement will be replicated as a statement, even when using + row-based replication. The flag will be reset at the end of the + statement. + */ + thd->clear_current_stmt_binlog_row_based(); + length= build_table_filename(path, sizeof(path), db, "", "", 0); strmov(path+length, MY_DB_OPT_FILE); // Append db option file name del_dbopt(path); // Remove dboption hash entry @@ -1388,7 +1395,7 @@ static void backup_current_db_name(THD *thd, } else { - strmake(saved_db_name->str, thd->db, saved_db_name->length); + strmake(saved_db_name->str, thd->db, saved_db_name->length - 1); saved_db_name->length= thd->db_length; } } diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index f183cb3142f..509e736f6e7 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -39,6 +39,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, ha_rows deleted= 0; uint usable_index= MAX_KEY; SELECT_LEX *select_lex= &thd->lex->select_lex; + THD::killed_state killed_status= THD::NOT_KILLED; DBUG_ENTER("mysql_delete"); if (open_and_lock_tables(thd, table_list)) @@ -307,7 +308,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, else table->file->unlock_row(); // Row failed selection, release lock on it } - if (thd->killed || thd->is_error()) + killed_status= thd->killed; + if (killed_status != THD::NOT_KILLED || thd->is_error()) error= 1; // Aborted if (will_batch && (loc_error= table->file->end_bulk_delete())) { @@ -352,13 +354,12 @@ cleanup: thd->transaction.stmt.modified_non_trans_table= TRUE; /* See similar binlogging code in sql_update.cc, for comments */ - if ((error < 0) || (deleted && !transactional_table)) + if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { if (mysql_bin_log.is_open()) { if (error < 0) thd->clear_error(); - /* [binlog]: If 'handler::delete_all_rows()' was called and the storage engine does not inject the rows itself, we replicate @@ -367,7 +368,7 @@ cleanup: */ int log_result= thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, - transactional_table, FALSE); + transactional_table, FALSE, killed_status); if (log_result && transactional_table) { @@ -548,7 +549,7 @@ bool mysql_multi_delete_prepare(THD *thd) multi_delete::multi_delete(TABLE_LIST *dt, uint num_of_tables_arg) : delete_tables(dt), deleted(0), found(0), num_of_tables(num_of_tables_arg), error(0), - do_delete(0), transactional_tables(0), normal_tables(0) + do_delete(0), transactional_tables(0), normal_tables(0), error_handled(0) { tempfiles= (Unique **) sql_calloc(sizeof(Unique *) * num_of_tables); } @@ -727,12 +728,14 @@ void multi_delete::send_error(uint errcode,const char *err) /* First send error what ever it is ... */ my_message(errcode, err, MYF(0)); - /* If nothing deleted return */ - if (!deleted) + /* the error was handled or nothing deleted and no side effects return */ + if (error_handled || + !thd->transaction.stmt.modified_non_trans_table && !deleted) DBUG_VOID_RETURN; /* Something already deleted so we have to invalidate cache */ - query_cache_invalidate3(thd, delete_tables, 1); + if (deleted) + query_cache_invalidate3(thd, delete_tables, 1); /* If rows from the first table only has been deleted and it is @@ -752,12 +755,30 @@ void multi_delete::send_error(uint errcode,const char *err) */ error= 1; send_eof(); + DBUG_ASSERT(error_handled); + DBUG_VOID_RETURN; } - DBUG_ASSERT(!normal_tables || !deleted || thd->transaction.stmt.modified_non_trans_table); + + if (thd->transaction.stmt.modified_non_trans_table) + { + /* + there is only side effects; to binlog with the error + */ + if (mysql_bin_log.is_open()) + { + thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_tables, FALSE); + } + thd->transaction.all.modified_non_trans_table= true; + } + DBUG_ASSERT(!normal_tables || !deleted || + thd->transaction.stmt.modified_non_trans_table); DBUG_VOID_RETURN; } + /* Do delete from other tables. Returns values: @@ -850,6 +871,7 @@ int multi_delete::do_deletes() bool multi_delete::send_eof() { + THD::killed_state killed_status= THD::NOT_KILLED; thd->proc_info="deleting from reference tables"; /* Does deletes for the last n - 1 tables, returns 0 if ok */ @@ -857,7 +879,7 @@ bool multi_delete::send_eof() /* compute a total error to know if something failed */ local_error= local_error || error; - + killed_status= (local_error == 0)? THD::NOT_KILLED : thd->killed; /* reset used flags */ thd->proc_info="end"; @@ -869,7 +891,9 @@ bool multi_delete::send_eof() { query_cache_invalidate3(thd, delete_tables, 1); } - if ((local_error == 0) || (deleted && normal_tables)) + DBUG_ASSERT(!normal_tables || !deleted || + thd->transaction.stmt.modified_non_trans_table); + if ((local_error == 0) || thd->transaction.stmt.modified_non_trans_table) { if (mysql_bin_log.is_open()) { @@ -877,7 +901,7 @@ bool multi_delete::send_eof() thd->clear_error(); if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, - transactional_tables, FALSE) && + transactional_tables, FALSE, killed_status) && !normal_tables) { local_error=1; // Log write failed: roll back the SQL statement @@ -886,7 +910,8 @@ bool multi_delete::send_eof() if (thd->transaction.stmt.modified_non_trans_table) thd->transaction.all.modified_non_trans_table= TRUE; } - DBUG_ASSERT(!normal_tables || !deleted || thd->transaction.stmt.modified_non_trans_table); + if (local_error != 0) + error_handled= TRUE; // to force early leave from ::send_error() /* Commit or rollback the current SQL statement */ if (transactional_tables) diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc index 19a99f9d12b..a4ba6f1140b 100644 --- a/sql/sql_handler.cc +++ b/sql/sql_handler.cc @@ -65,9 +65,6 @@ static enum enum_ha_read_modes rkey_to_rnext[]= { RNEXT_SAME, RNEXT, RPREV, RNEXT, RPREV, RNEXT, RPREV, RPREV }; -static int mysql_ha_flush_table(THD *thd, TABLE **table_ptr, uint mode_flags); - - /* Get hash key and hash key length. @@ -119,13 +116,15 @@ static void mysql_ha_hash_free(TABLE_LIST *tables) @param thd Thread identifier. @param tables A list of tables with the first entry to close. + @param is_locked If LOCK_open is locked. @note Though this function takes a list of tables, only the first list entry will be closed. - @note Broadcasts refresh if it closed the table. + @note Broadcasts refresh if it closed a table with old version. */ -static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables) +static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables, + bool is_locked) { TABLE **table_ptr; @@ -143,13 +142,15 @@ static void mysql_ha_close_table(THD *thd, TABLE_LIST *tables) if (*table_ptr) { (*table_ptr)->file->ha_index_or_rnd_end(); - VOID(pthread_mutex_lock(&LOCK_open)); + if (! is_locked) + VOID(pthread_mutex_lock(&LOCK_open)); if (close_thread_table(thd, table_ptr)) { /* Tell threads waiting for refresh that something has happened */ broadcast_refresh(); } - VOID(pthread_mutex_unlock(&LOCK_open)); + if (! is_locked) + VOID(pthread_mutex_unlock(&LOCK_open)); } else if (tables->table) { @@ -305,7 +306,7 @@ err: if (hash_tables) my_free((char*) hash_tables, MYF(0)); if (tables->table) - mysql_ha_close_table(thd, tables); + mysql_ha_close_table(thd, tables, FALSE); DBUG_PRINT("exit",("ERROR")); DBUG_RETURN(TRUE); } @@ -339,7 +340,7 @@ bool mysql_ha_close(THD *thd, TABLE_LIST *tables) (uchar*) tables->alias, strlen(tables->alias) + 1))) { - mysql_ha_close_table(thd, hash_tables); + mysql_ha_close_table(thd, hash_tables, FALSE); hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables); } else @@ -478,7 +479,7 @@ retry: if (need_reopen) { - mysql_ha_close_table(thd, tables); + mysql_ha_close_table(thd, tables, FALSE); hash_tables->table= NULL; /* The lock might have been aborted, we need to manually reset @@ -669,163 +670,130 @@ err0: } -/* - Flush (close) a list of HANDLER tables. - - SYNOPSIS - mysql_ha_flush() - thd Thread identifier. - tables The list of tables to close. If NULL, - close all HANDLER tables [marked as flushed]. - mode_flags MYSQL_HA_CLOSE_FINAL finally close the table. - MYSQL_HA_REOPEN_ON_USAGE mark for reopen. - MYSQL_HA_FLUSH_ALL flush all tables, not only - those marked for flush. - is_locked If LOCK_open is locked. - - DESCRIPTION - The list of HANDLER tables may be NULL, in which case all HANDLER - tables are closed (if MYSQL_HA_FLUSH_ALL) is set. - If 'tables' is NULL and MYSQL_HA_FLUSH_ALL is not set, - all HANDLER tables marked for flush are closed. - Broadcasts refresh for every table closed. +/** + Scan the handler tables hash for matching tables. - NOTE - Since mysql_ha_flush() is called when the base table has to be closed, - we compare real table names, not aliases. Hence, database names matter. + @param thd Thread identifier. + @param tables The list of tables to remove. - RETURN - 0 ok + @return Pointer to head of linked list (TABLE_LIST::next_local) of matching + TABLE_LIST elements from handler_tables_hash. Otherwise, NULL if no + table was matched. */ -int mysql_ha_flush(THD *thd, TABLE_LIST *tables, uint mode_flags, - bool is_locked) +static TABLE_LIST *mysql_ha_find(THD *thd, TABLE_LIST *tables) { - TABLE_LIST *tmp_tables; - TABLE **table_ptr; - bool did_lock= FALSE; - DBUG_ENTER("mysql_ha_flush"); - DBUG_PRINT("enter", ("tables: 0x%lx mode_flags: 0x%02x", - (long) tables, mode_flags)); + TABLE_LIST *hash_tables, *head= NULL, *first= tables; + DBUG_ENTER("mysql_ha_find"); - if (tables) + /* search for all handlers with matching table names */ + for (uint i= 0; i < thd->handler_tables_hash.records; i++) { - /* Close all tables in the list. */ - for (tmp_tables= tables ; tmp_tables; tmp_tables= tmp_tables->next_local) + hash_tables= (TABLE_LIST*) hash_element(&thd->handler_tables_hash, i); + for (tables= first; tables; tables= tables->next_local) { - DBUG_PRINT("info-in-tables-list",("'%s'.'%s' as '%s'", - tmp_tables->db, tmp_tables->table_name, - tmp_tables->alias)); - /* Close all currently open handler tables with the same base table. */ - table_ptr= &(thd->handler_tables); - while (*table_ptr) - { - if ((!*tmp_tables->db || - !my_strcasecmp(&my_charset_latin1, (*table_ptr)->s->db.str, - tmp_tables->db)) && - ! my_strcasecmp(&my_charset_latin1, - (*table_ptr)->s->table_name.str, - tmp_tables->table_name)) - { - DBUG_PRINT("info",("*table_ptr '%s'.'%s' as '%s'", - (*table_ptr)->s->db.str, - (*table_ptr)->s->table_name.str, - (*table_ptr)->alias)); - /* The first time it is required, lock for close_thread_table(). */ - if (! did_lock && ! is_locked) - { - VOID(pthread_mutex_lock(&LOCK_open)); - did_lock= TRUE; - } - mysql_ha_flush_table(thd, table_ptr, mode_flags); - continue; - } - table_ptr= &(*table_ptr)->next; - } - /* end of handler_tables list */ + if ((! *tables->db || + ! my_strcasecmp(&my_charset_latin1, hash_tables->db, tables->db)) && + ! my_strcasecmp(&my_charset_latin1, hash_tables->table_name, + tables->table_name)) + break; } - /* end of flush tables list */ - } - else - { - /* Close all currently open tables [which are marked for flush]. */ - table_ptr= &(thd->handler_tables); - while (*table_ptr) + if (tables) { - if ((mode_flags & MYSQL_HA_FLUSH_ALL) || - (*table_ptr)->needs_reopen_or_name_lock()) - { - /* The first time it is required, lock for close_thread_table(). */ - if (! did_lock && ! is_locked) - { - VOID(pthread_mutex_lock(&LOCK_open)); - did_lock= TRUE; - } - mysql_ha_flush_table(thd, table_ptr, mode_flags); - continue; - } - table_ptr= &(*table_ptr)->next; + hash_tables->next_local= head; + head= hash_tables; } } - /* Release the lock if it was taken by this function. */ - if (did_lock) - VOID(pthread_mutex_unlock(&LOCK_open)); + DBUG_RETURN(head); +} + + +/** + Remove matching tables from the HANDLER's hash table. + + @param thd Thread identifier. + @param tables The list of tables to remove. + @param is_locked If LOCK_open is locked. + + @note Broadcasts refresh if it closed a table with old version. +*/ + +void mysql_ha_rm_tables(THD *thd, TABLE_LIST *tables, bool is_locked) +{ + TABLE_LIST *hash_tables, *next; + DBUG_ENTER("mysql_ha_rm_tables"); + + DBUG_ASSERT(tables); + + hash_tables= mysql_ha_find(thd, tables); - DBUG_RETURN(0); + while (hash_tables) + { + next= hash_tables->next_local; + if (hash_tables->table) + mysql_ha_close_table(thd, hash_tables, is_locked); + hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables); + hash_tables= next; + } + + DBUG_VOID_RETURN; } -/* - Flush (close) a table. - SYNOPSIS - mysql_ha_flush_table() - thd Thread identifier. - table The table to close. - mode_flags MYSQL_HA_CLOSE_FINAL finally close the table. - MYSQL_HA_REOPEN_ON_USAGE mark for reopen. +/** + Flush (close and mark for re-open) all tables that should be should + be reopen. - DESCRIPTION - Broadcasts refresh if it closed the table. - The caller must lock LOCK_open. + @param thd Thread identifier. - RETURN - 0 ok + @note Broadcasts refresh if it closed a table with old version. */ -static int mysql_ha_flush_table(THD *thd, TABLE **table_ptr, uint mode_flags) +void mysql_ha_flush(THD *thd) { - TABLE_LIST *hash_tables; - TABLE *table= *table_ptr; - DBUG_ENTER("mysql_ha_flush_table"); - DBUG_PRINT("enter",("'%s'.'%s' as '%s' flags: 0x%02x", - table->s->db.str, table->s->table_name.str, - table->alias, mode_flags)); + TABLE_LIST *hash_tables; + DBUG_ENTER("mysql_ha_flush"); - if ((hash_tables= (TABLE_LIST*) hash_search(&thd->handler_tables_hash, - (uchar*) table->alias, - strlen(table->alias) + 1))) + safe_mutex_assert_owner(&LOCK_open); + + for (uint i= 0; i < thd->handler_tables_hash.records; i++) { - if (! (mode_flags & MYSQL_HA_REOPEN_ON_USAGE)) - { - /* This is a final close. Remove from hash. */ - hash_delete(&thd->handler_tables_hash, (uchar*) hash_tables); - } - else + hash_tables= (TABLE_LIST*) hash_element(&thd->handler_tables_hash, i); + if (hash_tables->table && hash_tables->table->needs_reopen_or_name_lock()) { + mysql_ha_close_table(thd, hash_tables, TRUE); /* Mark table as closed, ready for re-open. */ hash_tables->table= NULL; } - } + } - safe_mutex_assert_owner(&LOCK_open); - (*table_ptr)->file->ha_index_or_rnd_end(); - safe_mutex_assert_owner(&LOCK_open); - if (close_thread_table(thd, table_ptr)) + DBUG_VOID_RETURN; +} + + +/** + Close all HANDLER's tables. + + @param thd Thread identifier. + + @note Broadcasts refresh if it closed a table with old version. +*/ + +void mysql_ha_cleanup(THD *thd) +{ + TABLE_LIST *hash_tables; + DBUG_ENTER("mysql_ha_cleanup"); + + for (uint i= 0; i < thd->handler_tables_hash.records; i++) { - /* Tell threads waiting for refresh that something has happened */ - broadcast_refresh(); - } + hash_tables= (TABLE_LIST*) hash_element(&thd->handler_tables_hash, i); + if (hash_tables->table) + mysql_ha_close_table(thd, hash_tables, FALSE); + } - DBUG_RETURN(0); + hash_free(&thd->handler_tables_hash); + + DBUG_VOID_RETURN; } + diff --git a/sql/sql_help.cc b/sql/sql_help.cc index c1962c8c650..0d633ce86ac 100644 --- a/sql/sql_help.cc +++ b/sql/sql_help.cc @@ -528,7 +528,7 @@ int send_variant_2_list(MEM_ROOT *mem_root, Protocol *protocol, List_iterator<String> it(*names); for (pos= pointers; pos!=end; (*pos++= it++)); - qsort(pointers,names->elements,sizeof(String*),string_ptr_cmp); + my_qsort(pointers,names->elements,sizeof(String*),string_ptr_cmp); for (pos= pointers; pos!=end; pos++) { diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 83f7d9c2e78..805a8aeb445 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -574,7 +574,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, bool log_on= ((thd->options & OPTION_BIN_LOG) || (!(thd->security_ctx->master_access & SUPER_ACL))); #endif - thr_lock_type lock_type = table_list->lock_type; + thr_lock_type lock_type; Item *unused_conds= 0; DBUG_ENTER("mysql_insert"); @@ -609,6 +609,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, if (open_and_lock_tables(thd, table_list)) DBUG_RETURN(TRUE); } + lock_type= table_list->lock_type; thd->proc_info="init"; thd->used_tables=0; @@ -626,7 +627,6 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, /* mysql_prepare_insert set table_list->table if it was not set */ table= table_list->table; - lock_type= table_list->lock_type; context= &thd->lex->select_lex.context; /* @@ -837,59 +837,58 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, } transactional_table= table->file->has_transactions(); - if ((changed= (info.copied || info.deleted || info.updated)) || - was_insert_delayed) + if ((changed= (info.copied || info.deleted || info.updated))) { /* Invalidate the table in the query cache if something changed. For the transactional algorithm to work the invalidation must be before binlog writing and ha_autocommit_or_rollback */ - if (changed) - query_cache_invalidate3(thd, table_list, 1); - if (error <= 0 || !transactional_table) + query_cache_invalidate3(thd, table_list, 1); + } + if (changed && error <= 0 || thd->transaction.stmt.modified_non_trans_table + || was_insert_delayed) + { + if (mysql_bin_log.is_open()) { - if (mysql_bin_log.is_open()) + if (error <= 0) { - if (error <= 0) - { - /* - [Guilhem wrote] Temporary errors may have filled - thd->net.last_error/errno. For example if there has - been a disk full error when writing the row, and it was - MyISAM, then thd->net.last_error/errno will be set to - "disk full"... and the my_pwrite() will wait until free - space appears, and so when it finishes then the - write_row() was entirely successful - */ - /* todo: consider removing */ - thd->clear_error(); - } - /* bug#22725: - - A query which per-row-loop can not be interrupted with - KILLED, like INSERT, and that does not invoke stored - routines can be binlogged with neglecting the KILLED error. - - If there was no error (error == zero) until after the end of - inserting loop the KILLED flag that appeared later can be - disregarded since previously possible invocation of stored - routines did not result in any error due to the KILLED. In - such case the flag is ignored for constructing binlog event. - */ - DBUG_ASSERT(thd->killed != THD::KILL_BAD_DATA || error > 0); - if (thd->binlog_query(THD::ROW_QUERY_TYPE, - thd->query, thd->query_length, - transactional_table, FALSE, - (error>0) ? thd->killed : THD::NOT_KILLED) && - transactional_table) - { - error=1; - } - } - if (thd->transaction.stmt.modified_non_trans_table) - thd->transaction.all.modified_non_trans_table= TRUE; + /* + [Guilhem wrote] Temporary errors may have filled + thd->net.last_error/errno. For example if there has + been a disk full error when writing the row, and it was + MyISAM, then thd->net.last_error/errno will be set to + "disk full"... and the my_pwrite() will wait until free + space appears, and so when it finishes then the + write_row() was entirely successful + */ + /* todo: consider removing */ + thd->clear_error(); + } + /* bug#22725: + + A query which per-row-loop can not be interrupted with + KILLED, like INSERT, and that does not invoke stored + routines can be binlogged with neglecting the KILLED error. + + If there was no error (error == zero) until after the end of + inserting loop the KILLED flag that appeared later can be + disregarded since previously possible invocation of stored + routines did not result in any error due to the KILLED. In + such case the flag is ignored for constructing binlog event. + */ + DBUG_ASSERT(thd->killed != THD::KILL_BAD_DATA || error > 0); + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_table, FALSE, + (error>0) ? thd->killed : THD::NOT_KILLED) && + transactional_table) + { + error=1; + } } + if (thd->transaction.stmt.modified_non_trans_table) + thd->transaction.all.modified_non_trans_table= TRUE; } DBUG_ASSERT(transactional_table || !changed || thd->transaction.stmt.modified_non_trans_table); @@ -2273,7 +2272,17 @@ pthread_handler_t handle_delayed_insert(void *arg) parsed using a lex, that depends on initialized thd->lex. */ lex_start(thd); - if (!(di->table=open_ltable(thd, &di->table_list, TL_WRITE_DELAYED, 0))) + thd->lex->sql_command= SQLCOM_INSERT; // For innodb::store_lock() + /* + Statement-based replication of INSERT DELAYED has problems with RAND() + and user vars, so in mixed mode we go to row-based. + */ + thd->lex->set_stmt_unsafe(); + thd->set_current_stmt_binlog_row_based_if_mixed(); + + /* Open table */ + if (!(di->table= open_n_lock_single_table(thd, &di->table_list, + TL_WRITE_DELAYED))) { thd->fatal_error(); // Abort waiting inserts goto err; @@ -3090,6 +3099,7 @@ bool select_insert::send_eof() bool const trans_table= table->file->has_transactions(); ulonglong id; bool changed; + THD::killed_state killed_status= thd->killed; DBUG_ENTER("select_insert::send_eof"); DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'", trans_table, table->file->table_type())); @@ -3123,7 +3133,7 @@ bool select_insert::send_eof() thd->clear_error(); thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, - trans_table, FALSE); + trans_table, FALSE, killed_status); } /* We will call ha_autocommit_or_rollback() also for @@ -3175,6 +3185,7 @@ void select_insert::abort() { */ if (table) { + bool changed, transactional_table; /* If we are not in prelocked mode, we end the bulk insert started before. @@ -3196,20 +3207,20 @@ void select_insert::abort() { If table creation failed, the number of rows modified will also be zero, so no check for that is made. */ - if (info.copied || info.deleted || info.updated) + changed= (info.copied || info.deleted || info.updated); + transactional_table= table->file->has_transactions(); + if (thd->transaction.stmt.modified_non_trans_table) { - DBUG_ASSERT(table != NULL); - if (!table->file->has_transactions()) - { if (mysql_bin_log.is_open()) thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, - table->file->has_transactions(), FALSE); - if (!thd->current_stmt_binlog_row_based && !table->s->tmp_table && - !can_rollback_data()) + transactional_table, FALSE); + if (!thd->current_stmt_binlog_row_based && !can_rollback_data()) thd->transaction.all.modified_non_trans_table= TRUE; - query_cache_invalidate3(thd, table, 1); - } + if (changed) + query_cache_invalidate3(thd, table, 1); } + DBUG_ASSERT(transactional_table || !changed || + thd->transaction.stmt.modified_non_trans_table); table->file->ha_release_auto_increment(); } @@ -3304,7 +3315,7 @@ static TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info, tmp_table.alias= 0; tmp_table.timestamp_field= 0; tmp_table.s= &share; - init_tmp_table_share(&share, "", 0, "", ""); + init_tmp_table_share(thd, &share, "", 0, "", ""); tmp_table.s->db_create_options=0; tmp_table.s->blob_ptr_size= portable_sizeof_char_ptr; diff --git a/sql/sql_lex.h b/sql/sql_lex.h index da0ff94ec96..cdae7449cc7 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1834,11 +1834,11 @@ typedef struct st_lex : public Query_tables_list struct st_lex_local: public st_lex { - static void *operator new(size_t size) + static void *operator new(size_t size) throw() { return sql_alloc(size); } - static void *operator new(size_t size, MEM_ROOT *mem_root) + static void *operator new(size_t size, MEM_ROOT *mem_root) throw() { return (void*) alloc_root(mem_root, (uint) size); } diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 8bbe1e413b3..c96fbb80b0c 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -85,7 +85,8 @@ static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, #ifndef EMBEDDED_LIBRARY static bool write_execute_load_query_log_event(THD *thd, bool duplicates, bool ignore, - bool transactional_table); + bool transactional_table, + THD::killed_state killed_status); #endif /* EMBEDDED_LIBRARY */ /* @@ -134,6 +135,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, char *tdb= thd->db ? thd->db : db; // Result is never null ulong skip_lines= ex->skip_lines; bool transactional_table; + THD::killed_state killed_status= THD::NOT_KILLED; DBUG_ENTER("mysql_load"); #ifdef EMBEDDED_LIBRARY @@ -403,7 +405,16 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, free_blobs(table); /* if pack_blob was used */ table->copy_blobs=0; thd->count_cuted_fields= CHECK_FIELD_IGNORE; - + /* + simulated killing in the middle of per-row loop + must be effective for binlogging + */ + DBUG_EXECUTE_IF("simulate_kill_bug27571", + { + error=1; + thd->killed= THD::KILL_QUERY; + };); + killed_status= (error == 0)? THD::NOT_KILLED : thd->killed; /* We must invalidate the table in query cache before binlog writing and ha_autocommit_... @@ -445,9 +456,10 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, /* If the file was not empty, wrote_create_file is true */ if (lf_info.wrote_create_file) { - if ((info.copied || info.deleted) && !transactional_table) + if (thd->transaction.stmt.modified_non_trans_table) write_execute_load_query_log_event(thd, handle_duplicates, - ignore, transactional_table); + ignore, transactional_table, + killed_status); else { Delete_file_log_event d(thd, db, transactional_table); @@ -492,8 +504,8 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, read_info.end_io_cache(); if (lf_info.wrote_create_file) { - write_execute_load_query_log_event(thd, handle_duplicates, - ignore, transactional_table); + write_execute_load_query_log_event(thd, handle_duplicates, ignore, + transactional_table,killed_status); } } } @@ -523,7 +535,8 @@ err: /* Not a very useful function; just to avoid duplication of code */ static bool write_execute_load_query_log_event(THD *thd, bool duplicates, bool ignore, - bool transactional_table) + bool transactional_table, + THD::killed_state killed_err_arg) { Execute_load_query_log_event e(thd, thd->query, thd->query_length, @@ -531,7 +544,7 @@ static bool write_execute_load_query_log_event(THD *thd, (char*)thd->lex->fname_end - (char*)thd->query, (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE : (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR), - transactional_table, FALSE); + transactional_table, FALSE, killed_err_arg); e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; return mysql_bin_log.write(&e); } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 7a777ba2bbd..7c00ac6d1c9 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -470,6 +470,46 @@ end: } +/** + @brief Check access privs for a MERGE table and fix children lock types. + + @param[in] thd thread handle + @param[in] db database name + @param[in,out] table_list list of child tables (merge_list) + lock_type and optionally db set per table + + @return status + @retval 0 OK + @retval != 0 Error + + @detail + This function is used for write access to MERGE tables only + (CREATE TABLE, ALTER TABLE ... UNION=(...)). Set TL_WRITE for + every child. Set 'db' for every child if not present. +*/ +#ifndef NO_EMBEDDED_ACCESS_CHECKS +static bool check_merge_table_access(THD *thd, char *db, + TABLE_LIST *table_list) +{ + int error= 0; + + if (table_list) + { + /* Check that all tables use the current database */ + TABLE_LIST *tlist; + + for (tlist= table_list; tlist; tlist= tlist->next_local) + { + if (!tlist->db || !tlist->db[0]) + tlist->db= db; /* purecov: inspected */ + } + error= check_table_access(thd, SELECT_ACL | UPDATE_ACL | DELETE_ACL, + table_list,0); + } + return error; +} +#endif + /* This works because items are allocated with sql_alloc() */ void free_items(Item *item) @@ -748,12 +788,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, NET *net= &thd->net; bool error= 0; DBUG_ENTER("dispatch_command"); - - if (thd->killed == THD::KILL_QUERY || thd->killed == THD::KILL_BAD_DATA) - { - thd->killed= THD::NOT_KILLED; - thd->mysys_var->abort= 0; - } + DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command)); thd->command=command; /* @@ -2076,7 +2111,16 @@ mysql_execute_command(THD *thd) if (check_global_access(thd, SUPER_ACL | REPL_CLIENT_ACL)) goto error; pthread_mutex_lock(&LOCK_active_mi); - res = show_master_info(thd,active_mi); + if (active_mi != NULL) + { + res = show_master_info(thd, active_mi); + } + else + { + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "the master info structure does not exist"); + send_ok(thd); + } pthread_mutex_unlock(&LOCK_active_mi); break; } @@ -2250,6 +2294,19 @@ mysql_execute_command(THD *thd) select_lex->options|= SELECT_NO_UNLOCK; unit->set_limit(select_lex); + /* + Disable non-empty MERGE tables with CREATE...SELECT. Too + complicated. See Bug #26379. Empty MERGE tables are read-only + and don't allow CREATE...SELECT anyway. + */ + if (create_info.used_fields & HA_CREATE_USED_UNION) + { + my_error(ER_WRONG_OBJECT, MYF(0), create_table->db, + create_table->table_name, "BASE TABLE"); + res= 1; + goto end_with_restore_list; + } + if (!(create_info.options & HA_LEX_CREATE_TMP_TABLE)) { lex->link_first_table_back(create_table, link_to_local); @@ -2931,6 +2988,13 @@ end_with_restore_list: SELECT_NO_JOIN_CACHE | SELECT_NO_UNLOCK | OPTION_SETUP_TABLES_DONE, del_result, unit, select_lex); + res|= thd->net.report_error; + if (unlikely(res)) + { + /* If we had a another error reported earlier then this will be ignored */ + del_result->send_error(ER_UNKNOWN_ERROR, "Execution of the query failed"); + del_result->abort(); + } delete del_result; } else @@ -5072,26 +5136,6 @@ bool check_some_access(THD *thd, ulong want_access, TABLE_LIST *table) DBUG_RETURN(1); } - -bool check_merge_table_access(THD *thd, char *db, - TABLE_LIST *table_list) -{ - int error=0; - if (table_list) - { - /* Check that all tables use the current database */ - TABLE_LIST *tmp; - for (tmp= table_list; tmp; tmp= tmp->next_local) - { - if (!tmp->db || !tmp->db[0]) - tmp->db=db; - } - error=check_table_access(thd, SELECT_ACL | UPDATE_ACL | DELETE_ACL, - table_list,0); - } - return error; -} - #endif /*NO_EMBEDDED_ACCESS_CHECKS*/ /**************************************************************************** @@ -6271,24 +6315,23 @@ void add_join_natural(TABLE_LIST *a, TABLE_LIST *b, List<String> *using_fields, } -/* - Reload/resets privileges and the different caches. - - SYNOPSIS - reload_acl_and_cache() - thd Thread handler (can be NULL!) - options What should be reset/reloaded (tables, privileges, - slave...) - tables Tables to flush (if any) - write_to_binlog Depending on 'options', it may be very bad to write the - query to the binlog (e.g. FLUSH SLAVE); this is a - pointer where reload_acl_and_cache() will put 0 if - it thinks we really should not write to the binlog. - Otherwise it will put 1. - - RETURN - 0 ok - !=0 error. thd->killed or thd->is_error() is set +/** + @brief Reload/resets privileges and the different caches. + + @param thd Thread handler (can be NULL!) + @param options What should be reset/reloaded (tables, privileges, slave...) + @param tables Tables to flush (if any) + @param write_to_binlog True if we can write to the binlog. + + @note Depending on 'options', it may be very bad to write the + query to the binlog (e.g. FLUSH SLAVE); this is a + pointer where reload_acl_and_cache() will put 0 if + it thinks we really should not write to the binlog. + Otherwise it will put 1. + + @return Error status code + @retval 0 Ok + @retval !=0 Error; thd->killed is set or thd->is_error() is true */ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, @@ -6392,7 +6435,7 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, for (; lock_p < end_p; lock_p++) { - if ((*lock_p)->type == TL_WRITE) + if ((*lock_p)->type >= TL_WRITE_ALLOW_WRITE) { my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); return 1; @@ -6962,8 +7005,15 @@ bool create_table_precheck(THD *thd, TABLE_LIST *tables, bool error= TRUE; // Error message is given DBUG_ENTER("create_table_precheck"); + /* + Require CREATE [TEMPORARY] privilege on new table; for + CREATE TABLE ... SELECT, also require INSERT. + */ + want_priv= ((lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) ? - CREATE_TMP_ACL : CREATE_ACL); + CREATE_TMP_ACL : CREATE_ACL) | + (select_lex->item_list.elements ? INSERT_ACL : 0); + if (check_access(thd, want_priv, create_table->db, &create_table->grant.privilege, 0, 0, test(create_table->schema_table)) || diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 1f365ac991b..67190c94a56 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -1402,7 +1402,7 @@ static void set_up_partition_func_pointers(partition_info *part_info) NONE */ -static void set_linear_hash_mask(partition_info *part_info, uint no_parts) +void set_linear_hash_mask(partition_info *part_info, uint no_parts) { uint mask; @@ -2834,8 +2834,8 @@ int get_partition_id_range(partition_info *part_info, loc_part_id++; *part_id= (uint32)loc_part_id; if (loc_part_id == max_partition && - range_array[loc_part_id] != LONGLONG_MAX && - part_func_value >= range_array[loc_part_id]) + part_func_value >= range_array[loc_part_id] && + !part_info->defined_max_value) DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); DBUG_PRINT("exit",("partition: %d", *part_id)); @@ -2941,7 +2941,13 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info, } if (left_endpoint) { - if (part_func_value >= range_array[loc_part_id]) + longlong bound= range_array[loc_part_id]; + /* + In case of PARTITION p VALUES LESS THAN MAXVALUE + the maximum value is in the current partition. + */ + if (part_func_value > bound || + (part_func_value == bound && !part_info->defined_max_value)) loc_part_id++; } else @@ -4952,7 +4958,7 @@ the generated partition syntax in a correct manner. We use the old partitioning also for the new table. We do this by assigning the partition_info from the table loaded in - open_ltable to the partition_info struct used by mysql_create_table + open_table to the partition_info struct used by mysql_create_table later in this method. Case IIb: diff --git a/sql/sql_partition.h b/sql/sql_partition.h index 56f24181b93..282e24f1853 100644 --- a/sql/sql_partition.h +++ b/sql/sql_partition.h @@ -65,6 +65,7 @@ int get_part_for_delete(const uchar *buf, const uchar *rec0, void prune_partition_set(const TABLE *table, part_id_range *part_spec); bool check_partition_info(partition_info *part_info,handlerton **eng_type, TABLE *table, handler *file, HA_CREATE_INFO *info); +void set_linear_hash_mask(partition_info *part_info, uint no_parts); bool fix_partition_func(THD *thd, TABLE *table, bool create_table_ind); char *generate_partition_syntax(partition_info *part_info, uint *buf_length, bool use_sql_alloc, diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index d306ded7f4f..2a86844c8c6 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -181,6 +181,7 @@ public: TYPELIB* plugin_var_typelib(void); uchar* value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); bool check(THD *thd, set_var *var); + bool check_default(enum_var_type type) { return is_readonly(); } void set_default(THD *thd, enum_var_type type); bool update(THD *thd, set_var *var); }; @@ -2215,9 +2216,11 @@ static st_bookmark *register_var(const char *plugin, const char *name, size= sizeof(int); break; case PLUGIN_VAR_LONG: + case PLUGIN_VAR_ENUM: size= sizeof(long); break; case PLUGIN_VAR_LONGLONG: + case PLUGIN_VAR_SET: size= sizeof(ulonglong); break; case PLUGIN_VAR_STR: @@ -2658,6 +2661,7 @@ void sys_var_pluginvar::set_default(THD *thd, enum_var_type type) if (is_readonly()) return; + pthread_mutex_lock(&LOCK_global_system_variables); tgt= real_value_ptr(thd, type); src= ((void **) (plugin_var + 1) + 1); @@ -2674,12 +2678,14 @@ void sys_var_pluginvar::set_default(THD *thd, enum_var_type type) if (!(plugin_var->flags & PLUGIN_VAR_THDLOCAL) || type == OPT_GLOBAL) { - pthread_mutex_lock(&LOCK_plugin); plugin_var->update(thd, plugin_var, tgt, src); - pthread_mutex_unlock(&LOCK_plugin); + pthread_mutex_unlock(&LOCK_global_system_variables); } else + { + pthread_mutex_unlock(&LOCK_global_system_variables); plugin_var->update(thd, plugin_var, tgt, src); + } } diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index b1b1502f015..9d4d62e57b6 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -2700,7 +2700,7 @@ Prepared_statement::Prepared_statement(THD *thd_arg, Protocol *protocol_arg) last_errno(0), flags((uint) IS_IN_USE) { - init_alloc_root(&main_mem_root, thd_arg->variables.query_alloc_block_size, + init_sql_alloc(&main_mem_root, thd_arg->variables.query_alloc_block_size, thd_arg->variables.query_prealloc_size); *last_error= '\0'; } diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 750bcd50479..9dd8e1b70d4 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -51,6 +51,8 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent) DBUG_RETURN(1); } + mysql_ha_rm_tables(thd, table_list, FALSE); + if (wait_if_global_read_lock(thd,0,1)) DBUG_RETURN(1); diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 0249af147b0..88040e2933c 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -369,7 +369,6 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, name=0; // Find first log linfo.index_file_offset = 0; - thd->current_linfo = &linfo; if (mysql_bin_log.find_log_pos(&linfo, name, 1)) { @@ -378,6 +377,10 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, goto err; } + pthread_mutex_lock(&LOCK_thread_count); + thd->current_linfo = &linfo; + pthread_mutex_unlock(&LOCK_thread_count); + if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0) { my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; @@ -1359,7 +1362,6 @@ bool mysql_show_binlog_events(THD* thd) name=0; // Find first log linfo.index_file_offset = 0; - thd->current_linfo = &linfo; if (mysql_bin_log.find_log_pos(&linfo, name, 1)) { @@ -1367,6 +1369,10 @@ bool mysql_show_binlog_events(THD* thd) goto err; } + pthread_mutex_lock(&LOCK_thread_count); + thd->current_linfo = &linfo; + pthread_mutex_unlock(&LOCK_thread_count); + if ((file=open_binlog(&log, linfo.log_file_name, &errmsg)) < 0) goto err; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index fab46b4e1eb..3a142568080 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -3728,7 +3728,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, { KEYUSE key_end,*prev,*save_pos,*use; - qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE), + my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE), (qsort_cmp) sort_keyuse); bzero((char*) &key_end,sizeof(key_end)); /* Add for easy testing */ @@ -3739,7 +3739,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab, found_eq_constant=0; for (i=0 ; i < keyuse->elements-1 ; i++,use++) { - if (!use->used_tables) + if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL) use->table->const_key_parts[use->key]|= use->keypart_map; if (use->keypart != FT_KEYPART) { @@ -4497,8 +4497,9 @@ choose_plan(JOIN *join, table_map join_tables) Apply heuristic: pre-sort all access plans with respect to the number of records accessed. */ - qsort(join->best_ref + join->const_tables, join->tables - join->const_tables, - sizeof(JOIN_TAB*), straight_join?join_tab_cmp_straight:join_tab_cmp); + my_qsort(join->best_ref + join->const_tables, + join->tables - join->const_tables, sizeof(JOIN_TAB*), + straight_join ? join_tab_cmp_straight : join_tab_cmp); if (straight_join) { @@ -4545,6 +4546,17 @@ choose_plan(JOIN *join, table_map join_tables) ptr1 pointer to first JOIN_TAB object ptr2 pointer to second JOIN_TAB object + NOTES + The order relation implemented by join_tab_cmp() is not transitive, + i.e. it is possible to choose such a, b and c that (a < b) && (b < c) + but (c < a). This implies that result of a sort using the relation + implemented by join_tab_cmp() depends on the order in which + elements are compared, i.e. the result is implementation-specific. + Example: + a: dependent = 0x0 table->map = 0x1 found_records = 3 ptr = 0x907e6b0 + b: dependent = 0x0 table->map = 0x2 found_records = 3 ptr = 0x907e838 + c: dependent = 0x6 table->map = 0x10 found_records = 2 ptr = 0x907ecd0 + RETURN 1 if first is bigger -1 if second is bigger @@ -6445,7 +6457,15 @@ make_join_readinfo(JOIN *join, ulonglong options) else if (!table->covering_keys.is_clear_all() && !(tab->select && tab->select->quick)) { // Only read index tree - tab->index=find_shortest_key(table, & table->covering_keys); + /* + See bug #26447: "Using the clustered index for a table scan + is always faster than using a secondary index". + */ + if (table->s->primary_key != MAX_KEY && + table->file->primary_key_is_clustered()) + tab->index= table->s->primary_key; + else + tab->index=find_shortest_key(table, & table->covering_keys); tab->read_first_record= join_read_first; tab->type=JT_NEXT; // Read with index_first / index_next } @@ -9206,9 +9226,43 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table, new_field->set_derivation(item->collation.derivation); break; case DECIMAL_RESULT: - new_field= new Field_new_decimal(item->max_length, maybe_null, item->name, - item->decimals, item->unsigned_flag); + { + uint8 dec= item->decimals; + uint8 intg= ((Item_decimal *) item)->decimal_precision() - dec; + uint32 len= item->max_length; + + /* + Trying to put too many digits overall in a DECIMAL(prec,dec) + will always throw a warning. We must limit dec to + DECIMAL_MAX_SCALE however to prevent an assert() later. + */ + + if (dec > 0) + { + signed int overflow; + + dec= min(dec, DECIMAL_MAX_SCALE); + + /* + If the value still overflows the field with the corrected dec, + we'll throw out decimals rather than integers. This is still + bad and of course throws a truncation warning. + +1: for decimal point + */ + + overflow= my_decimal_precision_to_length(intg + dec, dec, + item->unsigned_flag) - len; + + if (overflow > 0) + dec= max(0, dec - overflow); // too long, discard fract + else + len -= item->decimals - dec; // corrected value fits + } + + new_field= new Field_new_decimal(len, maybe_null, item->name, + dec, item->unsigned_flag); break; + } case ROW_RESULT: default: // This case should never be choosen @@ -9629,7 +9683,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, table->keys_in_use_for_query.init(); table->s= share; - init_tmp_table_share(share, "", 0, tmpname, tmpname); + init_tmp_table_share(thd, share, "", 0, tmpname, tmpname); share->blob_field= blob_field; share->blob_ptr_size= mi_portable_sizeof_char_ptr; share->db_low_byte_first=1; // True for HEAP and MyISAM diff --git a/sql/sql_show.cc b/sql/sql_show.cc index a347482859f..9a7d7c59af3 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -29,6 +29,8 @@ #include "event_data_objects.h" #include <my_dir.h> +#define STR_OR_NIL(S) ((S) ? (S) : "<nil>") + #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" #endif @@ -3135,8 +3137,8 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) goto err; } DBUG_PRINT("INDEX VALUES",("db_name='%s', table_name='%s'", - lookup_field_vals.db_value.str, - lookup_field_vals.table_value.str)); + STR_OR_NIL(lookup_field_vals.db_value.str), + STR_OR_NIL(lookup_field_vals.table_value.str))); if (!lookup_field_vals.wild_db_value && !lookup_field_vals.wild_table_value) { diff --git a/sql/sql_table.cc b/sql/sql_table.cc index f179d8bea1e..dfb44f0d444 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -1552,6 +1552,8 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, built_query.append("DROP TABLE "); } + mysql_ha_rm_tables(thd, tables, FALSE); + pthread_mutex_lock(&LOCK_open); /* @@ -1593,7 +1595,9 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, handlerton *table_type; enum legacy_db_type frm_db_type; - mysql_ha_flush(thd, table, MYSQL_HA_CLOSE_FINAL, 1); + DBUG_PRINT("table", ("table_l: '%s'.'%s' table: 0x%lx s: 0x%lx", + table->db, table->table_name, (long) table->table, + table->table ? (long) table->table->s : (long) -1)); error= drop_temporary_table(thd, table); @@ -1603,13 +1607,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, tmp_table_deleted= 1; continue; case -1: - // table already in use - /* - XXX: This branch should never be taken outside of SF, trigger or - prelocked mode. - - DBUG_ASSERT(thd->in_sub_stmt); - */ + DBUG_ASSERT(thd->in_sub_stmt); error= 1; goto err_with_placeholders; default: @@ -1722,6 +1720,8 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, wrong_tables.append(','); wrong_tables.append(String(table->table_name,system_charset_info)); } + DBUG_PRINT("table", ("table: 0x%lx s: 0x%lx", (long) table->table, + table->table ? (long) table->table->s : (long) -1)); } /* It's safe to unlock LOCK_open: we have an exclusive lock @@ -1830,7 +1830,8 @@ bool quick_rm_table(handlerton *base,const char *db, /* Sort keys in the following order: - PRIMARY KEY - - UNIQUE keyws where all column are NOT NULL + - UNIQUE keys where all column are NOT NULL + - UNIQUE keys that don't contain partial segments - Other UNIQUE keys - Normal keys - Fulltext keys @@ -1841,26 +1842,31 @@ bool quick_rm_table(handlerton *base,const char *db, static int sort_keys(KEY *a, KEY *b) { - if (a->flags & HA_NOSAME) + ulong a_flags= a->flags, b_flags= b->flags; + + if (a_flags & HA_NOSAME) { - if (!(b->flags & HA_NOSAME)) + if (!(b_flags & HA_NOSAME)) return -1; - if ((a->flags ^ b->flags) & (HA_NULL_PART_KEY | HA_END_SPACE_KEY)) + if ((a_flags ^ b_flags) & (HA_NULL_PART_KEY | HA_END_SPACE_KEY)) { /* Sort NOT NULL keys before other keys */ - return (a->flags & (HA_NULL_PART_KEY | HA_END_SPACE_KEY)) ? 1 : -1; + return (a_flags & (HA_NULL_PART_KEY | HA_END_SPACE_KEY)) ? 1 : -1; } if (a->name == primary_key_name) return -1; if (b->name == primary_key_name) return 1; + /* Sort keys don't containing partial segments before others */ + if ((a_flags ^ b_flags) & HA_KEY_HAS_PART_KEY_SEG) + return (a_flags & HA_KEY_HAS_PART_KEY_SEG) ? 1 : -1; } - else if (b->flags & HA_NOSAME) + else if (b_flags & HA_NOSAME) return 1; // Prefer b - if ((a->flags ^ b->flags) & HA_FULLTEXT) + if ((a_flags ^ b_flags) & HA_FULLTEXT) { - return (a->flags & HA_FULLTEXT) ? 1 : -1; + return (a_flags & HA_FULLTEXT) ? 1 : -1; } /* Prefer original key order. usable_key_parts contains here @@ -2924,6 +2930,10 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, else key_info->flags|= HA_PACK_KEY; } + /* Check if the key segment is partial, set the key flag accordingly */ + if (length != sql_field->key_length) + key_info->flags|= HA_KEY_HAS_PART_KEY_SEG; + key_length+=length; key_part_info++; @@ -2979,8 +2989,8 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, DBUG_RETURN(TRUE); } /* Sort keys in optimized order */ - qsort((uchar*) *key_info_buffer, *key_count, sizeof(KEY), - (qsort_cmp) sort_keys); + my_qsort((uchar*) *key_info_buffer, *key_count, sizeof(KEY), + (qsort_cmp) sort_keys); create_info->null_bits= null_fields; DBUG_RETURN(FALSE); @@ -3737,14 +3747,16 @@ mysql_rename_table(handlerton *base, const char *old_db, Win32 clients must also have a WRITE LOCK on the table ! */ -static void wait_while_table_is_used(THD *thd,TABLE *table, - enum ha_extra_function function) +void wait_while_table_is_used(THD *thd, TABLE *table, + enum ha_extra_function function) { DBUG_ENTER("wait_while_table_is_used"); DBUG_PRINT("enter", ("table: '%s' share: 0x%lx db_stat: %u version: %lu", table->s->table_name.str, (ulong) table->s, table->db_stat, table->s->version)); + safe_mutex_assert_owner(&LOCK_open); + VOID(table->file->extra(function)); /* Mark all tables that are in use as 'old' */ mysql_lock_abort(thd, table, TRUE); /* end threads waiting on lock */ @@ -3868,6 +3880,8 @@ static int prepare_for_restore(THD* thd, TABLE_LIST* table, DBUG_RETURN(send_check_errmsg(thd, table, "restore", "Failed to open partially restored table")); } + /* A MERGE table must not come here. */ + DBUG_ASSERT(!table->table || !table->table->child_l); pthread_mutex_unlock(&LOCK_open); DBUG_RETURN(0); } @@ -3910,6 +3924,10 @@ static int prepare_for_repair(THD *thd, TABLE_LIST *table_list, table= &tmp_table; pthread_mutex_unlock(&LOCK_open); } + + /* A MERGE table must not come here. */ + DBUG_ASSERT(!table->child_l); + /* REPAIR TABLE ... USE_FRM for temporary tables makes little sense. */ @@ -4057,13 +4075,16 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) DBUG_RETURN(TRUE); - mysql_ha_flush(thd, tables, MYSQL_HA_CLOSE_FINAL, FALSE); + mysql_ha_rm_tables(thd, tables, FALSE); + for (table= tables; table; table= table->next_local) { char table_name[NAME_LEN*2+2]; char* db = table->db; bool fatal_error=0; + DBUG_PRINT("admin", ("table: '%s'.'%s'", table->db, table->table_name)); + DBUG_PRINT("admin", ("extra_open_options: %u", extra_open_options)); strxmov(table_name, db, ".", table->table_name, NullS); thd->open_options|= extra_open_options; table->lock_type= lock_type; @@ -4094,16 +4115,24 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, table->next_local= save_next_local; thd->open_options&= ~extra_open_options; } + DBUG_PRINT("admin", ("table: 0x%lx", (long) table->table)); + if (prepare_func) { + DBUG_PRINT("admin", ("calling prepare_func")); switch ((*prepare_func)(thd, table, check_opt)) { case 1: // error, message written to net ha_autocommit_or_rollback(thd, 1); close_thread_tables(thd); + DBUG_PRINT("admin", ("simple error, admin next table")); continue; case -1: // error, message could be written to net + /* purecov: begin inspected */ + DBUG_PRINT("admin", ("severe error, stop")); goto err; + /* purecov: end */ default: // should be 0 otherwise + DBUG_PRINT("admin", ("prepare_func succeeded")); ; } } @@ -4118,6 +4147,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, */ if (!table->table) { + DBUG_PRINT("admin", ("open table failed")); if (!thd->warn_list.elements) push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, ER_CHECK_NO_SUCH_TABLE, ER(ER_CHECK_NO_SUCH_TABLE)); @@ -4132,14 +4162,17 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, if (table->view) { + DBUG_PRINT("admin", ("calling view_operator_func")); result_code= (*view_operator_func)(thd, table); goto send_result; } if ((table->table->db_stat & HA_READ_ONLY) && open_for_modify) { + /* purecov: begin inspected */ char buff[FN_REFLEN + MYSQL_ERRMSG_SIZE]; uint length; + DBUG_PRINT("admin", ("sending error message")); protocol->prepare_for_resend(); protocol->store(table_name, system_charset_info); protocol->store(operator_name, system_charset_info); @@ -4154,11 +4187,13 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, if (protocol->write()) goto err; continue; + /* purecov: end */ } /* Close all instances of the table to allow repair to rename files */ if (lock_type == TL_WRITE && table->table->s->version) { + DBUG_PRINT("admin", ("removing table from cache")); pthread_mutex_lock(&LOCK_open); const char *old_message=thd->enter_cond(&COND_refresh, &LOCK_open, "Waiting to get writelock"); @@ -4178,6 +4213,8 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, if (table->table->s->crashed && operator_func == &handler::ha_check) { + /* purecov: begin inspected */ + DBUG_PRINT("admin", ("sending crashed warning")); protocol->prepare_for_resend(); protocol->store(table_name, system_charset_info); protocol->store(operator_name, system_charset_info); @@ -4186,6 +4223,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, system_charset_info); if (protocol->write()) goto err; + /* purecov: end */ } if (operator_func == &handler::ha_repair && @@ -4196,6 +4234,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, HA_ADMIN_NEEDS_ALTER)) { my_bool save_no_send_ok= thd->net.no_send_ok; + DBUG_PRINT("admin", ("recreating table")); ha_autocommit_or_rollback(thd, 1); close_thread_tables(thd); tmp_disable_binlog(thd); // binlogging is done by caller if wanted @@ -4208,7 +4247,9 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, } + DBUG_PRINT("admin", ("calling operator_func '%s'", operator_name)); result_code = (table->table->file->*operator_func)(thd, check_opt); + DBUG_PRINT("admin", ("operator_func returned: %d", result_code)); send_result: @@ -5855,8 +5896,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, build_table_filename(reg_path, sizeof(reg_path), db, table_name, reg_ext, 0); build_table_filename(path, sizeof(path), db, table_name, "", 0); - - mysql_ha_flush(thd, table_list, MYSQL_HA_CLOSE_FINAL, FALSE); + mysql_ha_rm_tables(thd, table_list, FALSE); /* DISCARD/IMPORT TABLESPACE is always alone in an ALTER TABLE */ if (alter_info->tablespace_op != NO_TABLESPACE_OP) @@ -5923,10 +5963,25 @@ view_err: start_waiting_global_read_lock(thd); DBUG_RETURN(error); } - if (!(table=open_ltable(thd, table_list, TL_WRITE_ALLOW_READ, 0))) + + if (!(table= open_n_lock_single_table(thd, table_list, TL_WRITE_ALLOW_READ))) DBUG_RETURN(TRUE); table->use_all_columns(); + /* + Prohibit changing of the UNION list of a non-temporary MERGE table + under LOCK tables. It would be quite difficult to reuse a shrinked + set of tables from the old table or to open a new TABLE object for + an extended list and verify that they belong to locked tables. + */ + if (thd->locked_tables && + (create_info->used_fields & HA_CREATE_USED_UNION) && + (table->s->tmp_table == NO_TMP_TABLE)) + { + my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0)); + DBUG_RETURN(TRUE); + } + /* Check that we are not trying to rename to an existing table */ if (new_name) { @@ -6013,7 +6068,8 @@ view_err: goto err; new_db_type= create_info->db_type; - if (new_db_type != old_db_type && + if ((new_db_type != old_db_type || + alter_info->flags & ALTER_PARTITION) && !table->file->can_switch_engines()) { my_error(ER_ROW_IS_REFERENCED, MYF(0)); @@ -6394,6 +6450,7 @@ view_err: goto err; /* Open the table if we need to copy the data. */ + DBUG_PRINT("info", ("need_copy_table: %u", need_copy_table)); if (need_copy_table != ALTER_TABLE_METADATA_ONLY) { if (table->s->tmp_table) @@ -6417,6 +6474,10 @@ view_err: } if (!new_table) goto err1; + /* + Note: In case of MERGE table, we do not attach children. We do not + copy data for MERGE tables. Only the children have data. + */ } /* Copy the data if necessary. */ @@ -6424,6 +6485,10 @@ view_err: thd->cuted_fields=0L; thd->proc_info="copy to tmp table"; copied=deleted=0; + /* + We do not copy data for MERGE tables. Only the children have data. + MERGE tables have HA_NO_COPY_ON_ALTER set. + */ if (new_table && !(new_table->file->ha_table_flags() & HA_NO_COPY_ON_ALTER)) { /* We don't want update TIMESTAMP fields during ALTER TABLE. */ @@ -6561,7 +6626,10 @@ view_err: if (new_table) { - /* Close the intermediate table that will be the new table */ + /* + Close the intermediate table that will be the new table. + Note that MERGE tables do not have their children attached here. + */ intern_close_table(new_table); my_free(new_table,MYF(0)); } @@ -6654,6 +6722,7 @@ view_err: /* Now we have to inform handler that new .FRM file is in place. To do this we need to obtain a handler object for it. + NO need to tamper with MERGE tables. The real open is done later. */ TABLE *t_table; if (new_name != table_name || new_db != db) @@ -6721,7 +6790,7 @@ view_err: /* For the alter table to be properly flushed to the logs, we have to open the new table. If not, we get a problem on server - shutdown. + shutdown. But we do not need to attach MERGE children. */ char path[FN_REFLEN]; TABLE *t_table; @@ -6910,23 +6979,35 @@ copy_data_between_tables(TABLE *from,TABLE *to, if (order) { - from->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE), - MYF(MY_FAE | MY_ZEROFILL)); - bzero((char*) &tables,sizeof(tables)); - tables.table= from; - tables.alias= tables.table_name= from->s->table_name.str; - tables.db= from->s->db.str; - error=1; + if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered()) + { + char warn_buff[MYSQL_ERRMSG_SIZE]; + my_snprintf(warn_buff, sizeof(warn_buff), + "ORDER BY ignored as there is a user-defined clustered index" + " in the table '%-.192s'", from->s->table_name.str); + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + warn_buff); + } + else + { + from->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE), + MYF(MY_FAE | MY_ZEROFILL)); + bzero((char *) &tables, sizeof(tables)); + tables.table= from; + tables.alias= tables.table_name= from->s->table_name.str; + tables.db= from->s->db.str; + error= 1; - if (thd->lex->select_lex.setup_ref_array(thd, order_num) || - setup_order(thd, thd->lex->select_lex.ref_pointer_array, - &tables, fields, all_fields, order) || - !(sortorder=make_unireg_sortorder(order, &length, NULL)) || - (from->sort.found_records = filesort(thd, from, sortorder, length, - (SQL_SELECT *) 0, HA_POS_ERROR, 1, - &examined_rows)) == - HA_POS_ERROR) - goto err; + if (thd->lex->select_lex.setup_ref_array(thd, order_num) || + setup_order(thd, thd->lex->select_lex.ref_pointer_array, + &tables, fields, all_fields, order) || + !(sortorder= make_unireg_sortorder(order, &length, NULL)) || + (from->sort.found_records= filesort(thd, from, sortorder, length, + (SQL_SELECT *) 0, HA_POS_ERROR, + 1, &examined_rows)) == + HA_POS_ERROR) + goto err; + } }; /* Tell handler that we have values for all columns in the to table */ @@ -7051,6 +7132,12 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list) Alter_info alter_info; DBUG_ENTER("mysql_recreate_table"); + DBUG_ASSERT(!table_list->next_global); + /* + table_list->table has been closed and freed. Do not reference + uninitialized data. open_tables() could fail. + */ + table_list->table= NULL; bzero((char*) &create_info, sizeof(create_info)); create_info.db_type= 0; @@ -7082,6 +7169,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) DBUG_RETURN(TRUE); + /* Open one table after the other to keep lock time as short as possible. */ for (table= tables; table; table= table->next_local) { char table_name[NAME_LEN*2+2]; @@ -7089,7 +7177,7 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, strxmov(table_name, table->db ,".", table->table_name, NullS); - t= table->table= open_ltable(thd, table, TL_READ, 0); + t= table->table= open_n_lock_single_table(thd, table, TL_READ); thd->clear_error(); // these errors shouldn't get client protocol->prepare_for_resend(); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index ce26b025430..b421f57b7ab 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -323,6 +323,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) TABLE *table; bool result= TRUE; String stmt_query; + bool need_start_waiting= FALSE; DBUG_ENTER("mysql_create_or_drop_trigger"); @@ -374,10 +375,12 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) /* We don't want perform our operations while global read lock is held so we have to wait until its end and then prevent it from occurring - again until we are done. (Acquiring LOCK_open is not enough because - global read lock is held without holding LOCK_open). + again until we are done, unless we are under lock tables. (Acquiring + LOCK_open is not enough because global read lock is held without holding + LOCK_open). */ - if (wait_if_global_read_lock(thd, 0, 1)) + if (!thd->locked_tables && + !(need_start_waiting= !wait_if_global_read_lock(thd, 0, 1))) DBUG_RETURN(TRUE); VOID(pthread_mutex_lock(&LOCK_open)); @@ -433,16 +436,30 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) goto end; } - if (lock_table_names(thd, tables)) - goto end; - /* We also don't allow creation of triggers on views. */ tables->required_type= FRMTYPE_TABLE; - if (reopen_name_locked_table(thd, tables, TRUE)) + /* Keep consistent with respect to other DDL statements */ + mysql_ha_rm_tables(thd, tables, TRUE); + + if (thd->locked_tables) { - unlock_table_name(thd, tables); - goto end; + /* Table must be write locked */ + if (name_lock_locked_table(thd, tables)) + goto end; + } + else + { + /* Grab the name lock and insert the placeholder*/ + if (lock_table_names(thd, tables)) + goto end; + + /* Convert the placeholder to a real table */ + if (reopen_name_locked_table(thd, tables, TRUE)) + { + unlock_table_name(thd, tables); + goto end; + } } table= tables->table; @@ -462,6 +479,26 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) table->triggers->create_trigger(thd, tables, &stmt_query): table->triggers->drop_trigger(thd, tables, &stmt_query)); + /* Under LOCK TABLES we must reopen the table to activate the trigger. */ + if (!result && thd->locked_tables) + { + /* Make table suitable for reopening */ + close_data_files_and_morph_locks(thd, tables->db, tables->table_name); + thd->in_lock_tables= 1; + if (reopen_tables(thd, 1, 1)) + { + /* To be safe remove this table from the set of LOCKED TABLES */ + unlink_open_table(thd, tables->table, FALSE); + + /* + Ignore reopen_tables errors for now. It's better not leave master/slave + in a inconsistent state. + */ + thd->clear_error(); + } + thd->in_lock_tables= 0; + } + end: if (!result) @@ -470,7 +507,9 @@ end: } VOID(pthread_mutex_unlock(&LOCK_open)); - start_waiting_global_read_lock(thd); + + if (need_start_waiting) + start_waiting_global_read_lock(thd); if (!result) send_ok(thd); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 46022c9f743..ecb7acda61b 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -203,6 +203,7 @@ int mysql_update(THD *thd, bool need_reopen; ulonglong id; List<Item> all_fields; + THD::killed_state killed_status= THD::NOT_KILLED; DBUG_ENTER("mysql_update"); for ( ; ; ) @@ -714,45 +715,25 @@ int mysql_update(THD *thd, thd->row_count++; } dup_key_found= 0; - - if (!transactional_table && updated > 0) - thd->transaction.stmt.modified_non_trans_table= TRUE; - - /* - todo bug#27571: to avoid asynchronization of `error' and - `error_code' of binlog event constructor - - The concept, which is a bit different for insert(!), is to - replace `error' assignment with the following lines - - killed_status= thd->killed; // get the status of the volatile - - Notice: thd->killed is type of "state" whereas the lhs has - "status" the suffix which translates according to WordNet: a state - at a particular time - at the time of the end of per-row loop in - our case. Binlogging ops are conducted with the status. - - error= (killed_status == THD::NOT_KILLED)? error : 1; - - which applies to most mysql_$query functions. - Event's constructor will accept `killed_status' as an argument: - - Query_log_event qinfo(..., killed_status); - - thd->killed might be changed after killed_status had got cached and this - won't affect binlogging event but other effects remain. - - Open issue: In a case the error happened not because of KILLED - - and then KILLED was caught later still within the loop - we shall - do something to avoid binlogging of incorrect ER_SERVER_SHUTDOWN - error_code. + Caching the killed status to pass as the arg to query event constuctor; + The cached value can not change whereas the killed status can + (externally) since this point and change of the latter won't affect + binlogging. + It's assumed that if an error was set in combination with an effective + killed status then the error is due to killing. */ - - if (thd->killed && !error) - error= 1; // Aborted - else if (will_batch && - (loc_error= table->file->exec_bulk_update(&dup_key_found))) + killed_status= thd->killed; // get the status of the volatile + // simulated killing after the loop must be ineffective for binlogging + DBUG_EXECUTE_IF("simulate_kill_bug27571", + { + thd->killed= THD::KILL_QUERY; + };); + error= (killed_status == THD::NOT_KILLED)? error : 1; + + if (error && + will_batch && + (loc_error= table->file->exec_bulk_update(&dup_key_found))) /* An error has occurred when a batched update was performed and returned an error indication. It cannot be an allowed duplicate key error since @@ -774,6 +755,10 @@ int mysql_update(THD *thd, if (will_batch) table->file->end_bulk_update(); table->file->try_semi_consistent_read(0); + + if (!transactional_table && updated > 0) + thd->transaction.stmt.modified_non_trans_table= TRUE; + end_read_record(&info); delete select; thd->proc_info= "end"; @@ -797,7 +782,7 @@ int mysql_update(THD *thd, Sometimes we want to binlog even if we updated no rows, in case user used it to be sure master and slave are in same state. */ - if ((error < 0) || (updated && !transactional_table)) + if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { if (mysql_bin_log.is_open()) { @@ -805,7 +790,7 @@ int mysql_update(THD *thd, thd->clear_error(); if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, - transactional_table, FALSE) && + transactional_table, FALSE, killed_status) && transactional_table) { error=1; // Rollback update @@ -1215,8 +1200,8 @@ multi_update::multi_update(TABLE_LIST *table_list, :all_tables(table_list), leaves(leaves_list), update_tables(0), tmp_tables(0), updated(0), found(0), fields(field_list), values(value_list), table_count(0), copy_field(0), - handle_duplicates(handle_duplicates_arg), do_update(1), trans_safe(0), - transactional_tables(1), ignore(ignore_arg) + handle_duplicates(handle_duplicates_arg), do_update(1), trans_safe(1), + transactional_tables(1), ignore(ignore_arg), error_handled(0) {} @@ -1418,7 +1403,6 @@ multi_update::initialize_tables(JOIN *join) if ((thd->options & OPTION_SAFE_UPDATES) && error_if_full_join(join)) DBUG_RETURN(1); main_table=join->join_tab->table; - trans_safe= transactional_tables= main_table->file->has_transactions(); table_to_update= 0; /* Any update has at least one pair (field, value) */ @@ -1713,12 +1697,14 @@ void multi_update::send_error(uint errcode,const char *err) /* First send error what ever it is ... */ my_error(errcode, MYF(0), err); - /* If nothing updated return */ - if (updated == 0) /* the counter might be reset in send_eof */ - return; /* and then the query has been binlogged */ + /* the error was handled or nothing deleted and no side effects return */ + if (error_handled || + !thd->transaction.stmt.modified_non_trans_table && !updated) + return; /* Something already updated so we have to invalidate cache */ - query_cache_invalidate3(thd, update_tables, 1); + if (updated) + query_cache_invalidate3(thd, update_tables, 1); /* If all tables that has been updated are trans safe then just do rollback. If not attempt to do remaining updates. @@ -1750,12 +1736,16 @@ void multi_update::send_error(uint errcode,const char *err) */ if (mysql_bin_log.is_open()) { + /* + THD::killed status might not have been set ON at time of an error + got caught and if happens later the killed error is written + into repl event. + */ thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, transactional_tables, FALSE); } - if (!trans_safe) - thd->transaction.all.modified_non_trans_table= TRUE; + thd->transaction.all.modified_non_trans_table= TRUE; } DBUG_ASSERT(trans_safe || !updated || thd->transaction.stmt.modified_non_trans_table); @@ -1947,11 +1937,20 @@ bool multi_update::send_eof() { char buff[STRING_BUFFER_USUAL_SIZE]; ulonglong id; + THD::killed_state killed_status= THD::NOT_KILLED; DBUG_ENTER("multi_update::send_eof"); thd->proc_info="updating reference tables"; - /* Does updates for the last n - 1 tables, returns 0 if ok */ + /* + Does updates for the last n - 1 tables, returns 0 if ok; + error takes into account killed status gained in do_updates() + */ int local_error = (table_count) ? do_updates(0) : 0; + /* + if local_error is not set ON until after do_updates() then + later carried out killing should not affect binlogging. + */ + killed_status= (local_error == 0)? THD::NOT_KILLED : thd->killed; thd->proc_info= "end"; /* We must invalidate the query cache before binlog writing and @@ -1978,11 +1977,9 @@ bool multi_update::send_eof() { if (local_error == 0) thd->clear_error(); - else - updated= 0; /* if there's an error binlog it here not in ::send_error */ if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, - transactional_tables, FALSE) && + transactional_tables, FALSE, killed_status) && trans_safe) { local_error= 1; // Rollback update @@ -1991,6 +1988,8 @@ bool multi_update::send_eof() if (thd->transaction.stmt.modified_non_trans_table) thd->transaction.all.modified_non_trans_table= TRUE; } + if (local_error != 0) + error_handled= TRUE; // to force early leave from ::send_error() if (transactional_tables) { diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 89e21cc27a5..6d2e4ea9a59 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -508,10 +508,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %pure_parser /* We have threads */ /* - Currently there are 280 shift/reduce conflicts. + Currently there are 177 shift/reduce conflicts. We should not introduce new conflicts any more. */ -%expect 280 +%expect 177 /* Comments for TOKENS. @@ -1157,7 +1157,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); variable variable_aux bool_pri predicate bit_expr table_wild simple_expr udf_expr - expr_or_default set_expr_or_default interval_expr + expr_or_default set_expr_or_default param_marker geometry_function signed_literal now_or_signed_literal opt_escape sp_opt_default @@ -5439,7 +5439,7 @@ alter_commands: lex->no_write_to_binlog= $3; lex->check_opt.init(); } - opt_no_write_to_binlog opt_mi_check_type + opt_no_write_to_binlog | ANALYZE_SYM PARTITION_SYM opt_no_write_to_binlog all_or_alt_part_name_list { @@ -5448,7 +5448,6 @@ alter_commands: lex->no_write_to_binlog= $3; lex->check_opt.init(); } - opt_mi_check_type | CHECK_SYM PARTITION_SYM all_or_alt_part_name_list { LEX *lex= Lex; @@ -5931,7 +5930,7 @@ analyze: lex->no_write_to_binlog= $2; lex->check_opt.init(); } - table_list opt_mi_check_type + table_list {} ; @@ -5987,7 +5986,7 @@ optimize: lex->no_write_to_binlog= $2; lex->check_opt.init(); } - table_list opt_mi_check_type + table_list {} ; @@ -6152,6 +6151,14 @@ select_paren: my_parse_error(ER(ER_SYNTAX_ERROR)); MYSQL_YYABORT; } + if (sel->linkage == UNION_TYPE && + sel->olap != UNSPECIFIED_OLAP_TYPE && + sel->master_unit()->fake_select_lex) + { + my_error(ER_WRONG_USAGE, MYF(0), + "CUBE/ROLLUP", "ORDER BY"); + MYSQL_YYABORT; + } /* select in braces, can't contain global parameters */ if (sel->master_unit()->fake_select_lex) sel->master_unit()->global_parameters= @@ -6570,10 +6577,10 @@ bit_expr: { $$= new Item_func_plus($1,$3); } | bit_expr '-' bit_expr %prec '-' { $$= new Item_func_minus($1,$3); } - | bit_expr '+' interval_expr interval %prec '+' - { $$= new Item_date_add_interval($1,$3,$4,0); } - | bit_expr '-' interval_expr interval %prec '-' - { $$= new Item_date_add_interval($1,$3,$4,1); } + | bit_expr '+' INTERVAL_SYM expr interval %prec '+' + { $$= new Item_date_add_interval($1,$4,$5,0); } + | bit_expr '-' INTERVAL_SYM expr interval %prec '-' + { $$= new Item_date_add_interval($1,$4,$5,1); } | bit_expr '*' bit_expr %prec '*' { $$= new Item_func_mul($1,$3); } | bit_expr '/' bit_expr %prec '/' @@ -6623,11 +6630,6 @@ all_or_any: | ANY_SYM { $$ = 0; } ; -interval_expr: - INTERVAL_SYM expr %prec INTERVAL_SYM - { $$=$2; } - ; - simple_expr: simple_ident | function_call_keyword @@ -6723,18 +6725,9 @@ simple_expr: $$= new (YYTHD->mem_root) Item_insert_value(Lex->current_context(), $3); } - | interval_expr interval '+' expr + | INTERVAL_SYM expr interval '+' expr %prec INTERVAL_SYM /* we cannot put interval before - */ - { $$= new (YYTHD->mem_root) Item_date_add_interval($4,$1,$2,0); } - | interval_expr - { - if ($1->type() != Item::ROW_ITEM) - { - my_parse_error(ER(ER_SYNTAX_ERROR)); - MYSQL_YYABORT; - } - $$= new (YYTHD->mem_root) Item_func_interval((Item_row *)$1); - } + { $$= new (YYTHD->mem_root) Item_date_add_interval($5,$2,$3,0); } ; /* @@ -6751,6 +6744,7 @@ function_call_keyword: | CURRENT_USER optional_braces { $$= new (YYTHD->mem_root) Item_func_current_user(Lex->current_context()); + Lex->set_stmt_unsafe(); Lex->safe_to_cache_query= 0; } | DATE_SYM '(' expr ')' @@ -6761,6 +6755,23 @@ function_call_keyword: { $$= new (YYTHD->mem_root) Item_func_hour($3); } | INSERT '(' expr ',' expr ',' expr ',' expr ')' { $$= new (YYTHD->mem_root) Item_func_insert($3,$5,$7,$9); } + | INTERVAL_SYM '(' expr ',' expr ')' %prec INTERVAL_SYM + { + THD *thd= YYTHD; + List<Item> *list= new (thd->mem_root) List<Item>; + list->push_front($5); + list->push_front($3); + Item_row *item= new (thd->mem_root) Item_row(*list); + $$= new (thd->mem_root) Item_func_interval(item); + } + | INTERVAL_SYM '(' expr ',' expr ',' expr_list ')' %prec INTERVAL_SYM + { + THD *thd= YYTHD; + $7->push_front($5); + $7->push_front($3); + Item_row *item= new (thd->mem_root) Item_row(*$7); + $$= new (thd->mem_root) Item_func_interval(item); + } | LEFT '(' expr ',' expr ')' { $$= new (YYTHD->mem_root) Item_func_left($3,$5); } | MINUTE_SYM '(' expr ')' @@ -6796,6 +6807,7 @@ function_call_keyword: | USER '(' ')' { $$= new (YYTHD->mem_root) Item_func_user(); + Lex->set_stmt_unsafe(); Lex->safe_to_cache_query=0; } | YEAR_SYM '(' expr ')' @@ -6837,10 +6849,10 @@ function_call_nonkeyword: $$= new (YYTHD->mem_root) Item_func_curtime_local($3); Lex->safe_to_cache_query=0; } - | DATE_ADD_INTERVAL '(' expr ',' interval_expr interval ')' - { $$= new (YYTHD->mem_root) Item_date_add_interval($3,$5,$6,0); } - | DATE_SUB_INTERVAL '(' expr ',' interval_expr interval ')' - { $$= new (YYTHD->mem_root) Item_date_add_interval($3,$5,$6,1); } + | DATE_ADD_INTERVAL '(' expr ',' INTERVAL_SYM expr interval ')' %prec INTERVAL_SYM + { $$= new (YYTHD->mem_root) Item_date_add_interval($3,$6,$7,0); } + | DATE_SUB_INTERVAL '(' expr ',' INTERVAL_SYM expr interval ')' %prec INTERVAL_SYM + { $$= new (YYTHD->mem_root) Item_date_add_interval($3,$6,$7,1); } | EXTRACT_SYM '(' interval FROM expr ')' { $$=new (YYTHD->mem_root) Item_extract( $3, $5); } | GET_FORMAT '(' date_time_type ',' expr ')' @@ -8035,7 +8047,8 @@ order_clause: SELECT_LEX *sel= lex->current_select; SELECT_LEX_UNIT *unit= sel-> master_unit(); if (sel->linkage != GLOBAL_OPTIONS_TYPE && - sel->olap != UNSPECIFIED_OLAP_TYPE) + sel->olap != UNSPECIFIED_OLAP_TYPE && + (sel->linkage != UNION_TYPE || sel->braces)) { my_error(ER_WRONG_USAGE, MYF(0), "CUBE/ROLLUP", "ORDER BY"); diff --git a/sql/structs.h b/sql/structs.h index 09a3c4d7285..662d0a680b8 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -71,7 +71,7 @@ typedef struct st_key_part_info { /* Info about a key part */ typedef struct st_key { uint key_length; /* Tot length of key */ - uint flags; /* dupp key and pack flags */ + ulong flags; /* dupp key and pack flags */ uint key_parts; /* How many key_parts */ uint extra_length; uint usable_key_parts; /* Should normally be = key_parts */ diff --git a/sql/table.cc b/sql/table.cc index c30407920f4..0f4c7cb7ba1 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -329,6 +329,7 @@ TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, SYNOPSIS init_tmp_table_share() + thd thread handle share Share to fill key Table_cache_key, as generated from create_table_def_key. must start with db name. @@ -346,7 +347,7 @@ TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, use key_length= 0 as neither table_cache_key or key_length will be used). */ -void init_tmp_table_share(TABLE_SHARE *share, const char *key, +void init_tmp_table_share(THD *thd, TABLE_SHARE *share, const char *key, uint key_length, const char *table_name, const char *path) { @@ -373,9 +374,14 @@ void init_tmp_table_share(TABLE_SHARE *share, const char *key, anyway to be able to catch errors. */ share->table_map_version= ~(ulonglong)0; - share->table_map_id= ~0UL; share->cached_row_logging_check= -1; + /* + table_map_id is also used for MERGE tables to suppress repeated + compatibility checks. + */ + share->table_map_id= (ulong) thd->query_id; + DBUG_VOID_RETURN; } @@ -4483,6 +4489,25 @@ void st_table::mark_columns_needed_for_insert() mark_auto_increment_column(); } + +/** + @brief Check if this is part of a MERGE table with attached children. + + @return status + @retval TRUE children are attached + @retval FALSE no MERGE part or children not attached + + @detail + A MERGE table consists of a parent TABLE and zero or more child + TABLEs. Each of these TABLEs is called a part of a MERGE table. +*/ + +bool st_table::is_children_attached(void) +{ + return((child_l && children_attached) || + (parent && parent->children_attached)); +} + /* Cleanup this table for re-execution. diff --git a/sql/table.h b/sql/table.h index 440f3a3d5cf..1e80afdf421 100644 --- a/sql/table.h +++ b/sql/table.h @@ -431,6 +431,12 @@ typedef struct st_table_share { return (table_category == TABLE_CATEGORY_PERFORMANCE); } + + inline ulong get_table_def_version() + { + return table_map_id; + } + } TABLE_SHARE; @@ -455,6 +461,11 @@ struct st_table { #endif struct st_table *next, *prev; + /* For the below MERGE related members see top comment in ha_myisammrg.cc */ + struct st_table *parent; /* Set in MERGE child. Ptr to parent */ + TABLE_LIST *child_l; /* Set in MERGE parent. List of children */ + TABLE_LIST **child_last_l; /* Set in MERGE parent. End of list */ + THD *in_use; /* Which thread uses this */ Field **field; /* Pointer to fields */ @@ -622,6 +633,8 @@ struct st_table { my_bool insert_or_update; /* Can be used by the handler */ my_bool alias_name_used; /* true if table_name is alias */ my_bool get_fields_in_item_tree; /* Signal to fix_field */ + /* If MERGE children attached to parent. See top comment in ha_myisammrg.cc */ + my_bool children_attached; REGINFO reginfo; /* field connections */ MEM_ROOT mem_root; @@ -673,6 +686,7 @@ struct st_table { */ inline bool needs_reopen_or_name_lock() { return s->version != refresh_version; } + bool is_children_attached(void); }; enum enum_schema_table_state @@ -996,6 +1010,8 @@ struct TABLE_LIST (non-zero only for merged underlying tables of a view). */ TABLE_LIST *referencing_view; + /* Ptr to parent MERGE table list item. See top comment in ha_myisammrg.cc */ + TABLE_LIST *parent_l; /* Security context (non-zero only for tables which belong to view with SQL SECURITY DEFINER) @@ -1177,6 +1193,20 @@ struct TABLE_LIST */ bool process_index_hints(TABLE *table); + /* Access MERGE child def version. See top comment in ha_myisammrg.cc */ + inline ulong get_child_def_version() + { + return child_def_version; + } + inline void set_child_def_version(ulong version) + { + child_def_version= version; + } + inline void init_child_def_version() + { + child_def_version= ~0UL; + } + private: bool prep_check_option(THD *thd, uint8 check_opt_type); bool prep_where(THD *thd, Item **conds, bool no_where_clause); @@ -1184,6 +1214,9 @@ private: Cleanup for re-execution in a prepared statement or a stored procedure. */ + + /* Remembered MERGE child def version. See top comment in ha_myisammrg.cc */ + ulong child_def_version; }; class Item; diff --git a/sql/unireg.cc b/sql/unireg.cc index f2238d69973..aab3f5606a8 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -227,6 +227,14 @@ bool mysql_create_frm(THD *thd, const char *file_name, strmake((char*) forminfo+47, create_info->comment.str ? create_info->comment.str : "", create_info->comment.length); forminfo[46]=(uchar) create_info->comment.length; +#ifdef EXTRA_DEBUG + /* + EXTRA_DEBUG causes strmake() to initialize its buffer behind the + payload with a magic value to detect wrong buffer-sizes. We + explicitly zero that segment again. + */ + memset((char*) forminfo+47 + forminfo[46], 0, 61 - forminfo[46]); +#endif #ifdef WITH_PARTITION_STORAGE_ENGINE if (part_info) { @@ -505,7 +513,7 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo, int2store(pos+6, key->block_size); pos+=8; key_parts+=key->key_parts; - DBUG_PRINT("loop", ("flags: %d key_parts: %d at 0x%lx", + DBUG_PRINT("loop", ("flags: %lu key_parts: %d at 0x%lx", key->flags, key->key_parts, (long) key->key_part)); for (key_part=key->key_part,key_part_end=key_part+key->key_parts ; |