summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOleksandr Byelkin <sanja@mariadb.com>2020-06-10 21:14:13 +0300
committerMonty <monty@mariadb.org>2020-06-14 19:39:43 +0300
commitd7a9cdc627e59b3dbb91c8878ab74d62e0fdb3ce (patch)
tree2475063d16dcdf591792dcbe9791afa23dc09a9d
parentb3179b7e3294b8e6869283b69d1ab811b7a86944 (diff)
downloadmariadb-git-d7a9cdc627e59b3dbb91c8878ab74d62e0fdb3ce.tar.gz
Fixed hang in Aria page cache with concurrent SELECT
MDEV-20302 Server hangs upon concurrent SELECT from partitioned S3
-rw-r--r--mysql-test/suite/s3/alter2.result27
-rw-r--r--mysql-test/suite/s3/alter2.test33
-rw-r--r--storage/maria/ma_pagecache.c124
3 files changed, 150 insertions, 34 deletions
diff --git a/mysql-test/suite/s3/alter2.result b/mysql-test/suite/s3/alter2.result
index d2849905c67..b15d2b1eb96 100644
--- a/mysql-test/suite/s3/alter2.result
+++ b/mysql-test/suite/s3/alter2.result
@@ -20,3 +20,30 @@ connection con1;
disconnect con1;
connection default;
DROP TABLE t1;
+#
+# MDEV-20302 Server hangs upon concurrent SELECT from partitioned S3
+# table
+#
+CREATE TABLE t1 (
+pk INT AUTO_INCREMENT,
+c CHAR(12),
+PRIMARY KEY(pk),
+KEY(c)
+) ENGINE=Aria
+PARTITION BY KEY(pk) PARTITIONS 2;
+CREATE VIEW v1 AS SELECT * FROM t1;
+INSERT INTO t1 VALUES (NULL,'ill'),(NULL,'loop');
+ALTER TABLE t1 ENGINE=S3;
+connect con1,localhost,root,,test;
+SELECT * FROM t1 WHERE c BETWEEN 'bar' AND 'foo';
+connection default;
+SELECT pk FROM v1;
+pk
+1
+2
+connection con1;
+pk c
+disconnect con1;
+connection default;
+DROP VIEW v1;
+DROP TABLE t1;
diff --git a/mysql-test/suite/s3/alter2.test b/mysql-test/suite/s3/alter2.test
index de2bc001298..0be82f5a7ff 100644
--- a/mysql-test/suite/s3/alter2.test
+++ b/mysql-test/suite/s3/alter2.test
@@ -1,4 +1,5 @@
--source include/have_s3.inc
+--source include/have_partition.inc
--source create_database.inc
--echo #
@@ -26,6 +27,38 @@ SELECT * FROM t1;
--connection default
DROP TABLE t1;
+--echo #
+--echo # MDEV-20302 Server hangs upon concurrent SELECT from partitioned S3
+--echo # table
+--echo #
+
+CREATE TABLE t1 (
+ pk INT AUTO_INCREMENT,
+ c CHAR(12),
+ PRIMARY KEY(pk),
+ KEY(c)
+) ENGINE=Aria
+ PARTITION BY KEY(pk) PARTITIONS 2;
+
+CREATE VIEW v1 AS SELECT * FROM t1;
+INSERT INTO t1 VALUES (NULL,'ill'),(NULL,'loop');
+ALTER TABLE t1 ENGINE=S3;
+
+--connect (con1,localhost,root,,test)
+--send
+ SELECT * FROM t1 WHERE c BETWEEN 'bar' AND 'foo';
+
+--connection default
+SELECT pk FROM v1;
+
+--connection con1
+--reap
+
+--disconnect con1
+--connection default
+DROP VIEW v1;
+DROP TABLE t1;
+
#
# clean up
#
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index b3bbee4cc22..a1796f52819 100644
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -130,7 +130,7 @@ my_bool my_disable_flush_pagecache_blocks= 0;
#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */
#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */
#define COND_FOR_WRLOCK 2 /* queue of write lock */
-#define COND_FOR_BIG_BLOCK 3 /* queue of waiting fo big block read */
+#define COND_FOR_BIG_BLOCK 3 /* queue of waiting for big block read */
#define COND_SIZE 4 /* number of COND_* queues */
typedef mysql_cond_t KEYCACHE_CONDVAR;
@@ -178,7 +178,9 @@ struct st_pagecache_hash_link
#define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */
#define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */
#define PCBLOCK_DEL_WRITE 128 /* should be written on delete */
-#define PCBLOCK_BIG_READ 256 /* the first block of the big read in progress */
+#define PCBLOCK_BIG_READ 256 /* the first block of the big read in progress
+ or not first block which other thread wait
+ to be read in big read operation */
/* page status, returned by find_block */
#define PAGE_READ 0
@@ -2770,7 +2772,7 @@ retry:
*/
#ifdef WITH_S3_STORAGE_ENGINE
-static my_bool read_big_block(PAGECACHE *pagecache,
+static void read_big_block(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block)
{
int page_st;
@@ -2809,25 +2811,26 @@ static my_bool read_big_block(PAGECACHE *pagecache,
if (block_to_read->status & PCBLOCK_ERROR)
{
/* We get first block with an error so all operation failed */
- block->status|= PCBLOCK_ERROR;
- block->error= block_to_read->error;
- DBUG_RETURN(FALSE); // no retry
+ goto error;
}
- // only primary request here, PAGE_WAIT_TO_BE_READ is impossible
- DBUG_ASSERT(page_st != PAGE_WAIT_TO_BE_READ);
if (block_to_read->status & PCBLOCK_BIG_READ)
{
+ /*
+ Other thread is reading the big block so we will wait when it will
+ have read our block for us
+ */
struct st_my_thread_var *thread;
+ DBUG_ASSERT(page_st == PAGE_WAIT_TO_BE_READ);
DBUG_ASSERT(page_st != PAGE_TO_BE_READ);
+ block->status|= PCBLOCK_BIG_READ; // will be read by other thread
/*
Block read failed because somebody else is reading the first block
(and all other blocks part of this one).
Wait until block is available.
*/
- unreg_request(pagecache, block, 1);
thread= my_thread_var;
/* Put the request into a queue and wait until it can be processed */
- wqueue_add_to_queue(&block->wqueue[COND_FOR_BIG_BLOCK], thread);
+ wqueue_add_to_queue(&block_to_read->wqueue[COND_FOR_BIG_BLOCK], thread);
do
{
DBUG_PRINT("wait",
@@ -2837,7 +2840,21 @@ static my_bool read_big_block(PAGECACHE *pagecache,
&pagecache->cache_lock);
}
while (thread->next);
- DBUG_RETURN(TRUE);
+ // page shoud be read by other thread
+ DBUG_ASSERT(block->status & PCBLOCK_READ ||
+ block->status & PCBLOCK_ERROR);
+ DBUG_ASSERT(block->status & PCBLOCK_BIG_READ);
+ block->status&= ~PCBLOCK_BIG_READ;
+ // all is read => lets finish nice
+ DBUG_ASSERT(block_to_read != block);
+ remove_reader(block_to_read);
+ unreg_request(pagecache, block_to_read, 1);
+ DBUG_VOID_RETURN;
+ }
+ else
+ {
+ // only primary request here, PAGE_WAIT_TO_BE_READ is impossible
+ DBUG_ASSERT(page_st != PAGE_WAIT_TO_BE_READ);
}
}
else
@@ -2863,18 +2880,9 @@ static my_bool read_big_block(PAGECACHE *pagecache,
{
pagecache_pthread_mutex_lock(&pagecache->cache_lock);
block_to_read->status|= PCBLOCK_ERROR;
- block->status|= PCBLOCK_ERROR;
- block_to_read->error= block->error= (int16) my_errno;
+ block_to_read->error= (int16) my_errno;
pagecache->big_block_free(&data);
- if (block_to_read != block)
- {
- remove_reader(block_to_read);
- unreg_request(pagecache, block_to_read, 1);
- }
- /* Signal that all pending requests for this page now can be processed */
- if (block->wqueue[COND_FOR_REQUESTED].last_thread)
- wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
- DBUG_RETURN(FALSE); // no retry
+ goto error;
}
/*
@@ -2914,12 +2922,18 @@ static my_bool read_big_block(PAGECACHE *pagecache,
TRUE /*register*/, TRUE /*fast*/, &page_st);
if (!bl)
{
- // we run out of easy avaliable pages in the cache
- break;
+ /*
+ We can not get this page easy.
+ Maybe we will be lucky with other pages,
+ also among other pages can be page which wated by other thread
+ */
+ continue;
}
DBUG_ASSERT(bl == bl->hash_link->block);
if ((bl->status & PCBLOCK_ERROR) == 0 &&
- page_st == PAGE_TO_BE_READ)
+ (page_st == PAGE_TO_BE_READ || // page shoud be read
+ (page_st == PAGE_WAIT_TO_BE_READ &&
+ (bl->status & PCBLOCK_BIG_READ)))) // or page waited by other thread
{
memcpy(bl->buffer, data.str + offset, pagecache->block_size);
bl->status|= PCBLOCK_READ;
@@ -2940,6 +2954,8 @@ static my_bool read_big_block(PAGECACHE *pagecache,
pagecache->big_block_free(&data);
block_to_read->status&= ~PCBLOCK_BIG_READ;
+
+end:
if (block_to_read != block)
{
remove_reader(block_to_read);
@@ -2947,10 +2963,56 @@ static my_bool read_big_block(PAGECACHE *pagecache,
}
if (block->wqueue[COND_FOR_BIG_BLOCK].last_thread)
wqueue_release_queue(&block->wqueue[COND_FOR_BIG_BLOCK]);
+ /* Signal that all pending requests for this page now can be processed */
if (block->wqueue[COND_FOR_REQUESTED].last_thread)
wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
+ DBUG_VOID_RETURN;
- DBUG_RETURN(FALSE);
+error:
+ /*
+ Read failed. Mark all readers waiting for the a block covered by the
+ big block that the read failed
+ */
+ for (offset= pagecache->block_size, page= page_to_read + 1;
+ offset < data.length;
+ offset+= pagecache->block_size, page++)
+ {
+ DBUG_ASSERT(offset + pagecache->block_size <= data.length);
+ if (page == our_page)
+ {
+ DBUG_ASSERT(!(block->status & PCBLOCK_READ));
+ block->status|= PCBLOCK_ERROR;
+ block->error= (int16) my_errno;
+ }
+ else
+ {
+ PAGECACHE_BLOCK_LINK *bl;
+ bl= find_block(pagecache, &block->hash_link->file, page, 1,
+ FALSE, TRUE /* copy under protection (?)*/,
+ TRUE /*register*/, TRUE /*fast*/, &page_st);
+ if (!bl)
+ {
+ /*
+ We can not get this page easy.
+ Maybe we will be lucky with other pages,
+ also among other pages can be page which wated by other thread
+ */
+ continue;
+ }
+ DBUG_ASSERT(bl == bl->hash_link->block);
+ if ((bl->status & PCBLOCK_ERROR) == 0 &&
+ (page_st == PAGE_TO_BE_READ || // page shoud be read
+ (page_st == PAGE_WAIT_TO_BE_READ &&
+ (bl->status & PCBLOCK_BIG_READ)))) // or page waited by other thread
+ {
+ bl->status|= PCBLOCK_ERROR;
+ bl->error= (int16) my_errno;
+ }
+ remove_reader(bl);
+ unreg_request(pagecache, bl, 1);
+ }
+ }
+ goto end;
}
#endif /* WITH_S3_STORAGE_ENGINE */
@@ -3706,14 +3768,8 @@ restart:
/* It is big read and this thread should read */
DBUG_ASSERT(page_st == PAGE_TO_BE_READ);
- if (read_big_block(pagecache, block))
- {
- /* block is unregistered in read_big_block */
- pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
- dec_counter_for_resize_op(pagecache);
- DBUG_PRINT("restart", ("big block fail, restarting..."));
- goto restart;
- }
+ read_big_block(pagecache, block);
+
if (!((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
(new_pin == PAGECACHE_PIN)))
{