diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-01-11 11:35:25 +0100 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-01-11 11:35:25 +0100 |
commit | 641ce5e97e64fc89e497ee903bca03bef9476665 (patch) | |
tree | 42bd34e581072a3b8da2e30ed6fdbd85b4079395 | |
parent | 2c0f53d69ceba4f3db98afba47bfb73b3b83525d (diff) | |
download | mariadb-git-641ce5e97e64fc89e497ee903bca03bef9476665.tar.gz |
wl2723 - ndb opt. nr
mysql-test/t/rpl_multi_engine.test:
merge fix
libmysqld/sql_tablespace.cc:
New BitKeeper file ``libmysqld/sql_tablespace.cc''
mysql-test/r/ndb_basic_disk.result:
New BitKeeper file ``mysql-test/r/ndb_basic_disk.result''
mysql-test/t/ndb_basic_disk.test:
New BitKeeper file ``mysql-test/t/ndb_basic_disk.test''
sql/sql_tablespace.cc:
New BitKeeper file ``sql/sql_tablespace.cc''
storage/ndb/src/kernel/blocks/OptNR.txt:
New BitKeeper file ``storage/ndb/src/kernel/blocks/OptNR.txt''
storage/ndb/src/kernel/vm/mem.txt:
New BitKeeper file ``storage/ndb/src/kernel/vm/mem.txt''
storage/ndb/src/kernel/vm/ndbd_malloc_impl.cpp:
New BitKeeper file ``storage/ndb/src/kernel/vm/ndbd_malloc_impl.cpp''
storage/ndb/src/kernel/vm/ndbd_malloc_impl.hpp:
New BitKeeper file ``storage/ndb/src/kernel/vm/ndbd_malloc_impl.hpp''
storage/ndb/tools/ndb_error_reporter:
New BitKeeper file ``storage/ndb/tools/ndb_error_reporter''
138 files changed, 9292 insertions, 3267 deletions
diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index 28db9d11558..cd2dd409e74 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -65,7 +65,8 @@ sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \ sp_head.cc sp_pcontext.cc sp.cc sp_cache.cc sp_rcontext.cc \ parse_file.cc sql_view.cc sql_trigger.cc my_decimal.cc \ item_xmlfunc.cc \ - rpl_filter.cc sql_partition.cc handlerton.cc sql_plugin.cc + rpl_filter.cc sql_partition.cc handlerton.cc sql_plugin.cc \ + sql_tablespace.cc libmysqld_int_a_SOURCES= $(libmysqld_sources) $(libmysqlsources) $(sqlsources) EXTRA_libmysqld_a_SOURCES = ha_innodb.cc ha_berkeley.cc ha_archive.cc \ diff --git a/mysql-test/r/ndb_basic_disk.result b/mysql-test/r/ndb_basic_disk.result new file mode 100644 index 00000000000..2a433ad224c --- /dev/null +++ b/mysql-test/r/ndb_basic_disk.result @@ -0,0 +1,397 @@ +DROP TABLE IF EXISTS t1; +CREATE LOGFILE GROUP lg1 +ADD UNDOFILE 'undofile.dat' +INITIAL_SIZE 16M +UNDO_BUFFER_SIZE = 1M +ENGINE=NDB; +alter logfile group lg1 +add undofile 'undofile02.dat' +initial_size 4M engine=ndb; +CREATE TABLESPACE ts1 +ADD DATAFILE 'datafile.dat' +USE LOGFILE GROUP lg1 +INITIAL_SIZE 12M +ENGINE NDB; +alter tablespace ts1 +add datafile 'datafile02.dat' +initial_size 4M engine=ndb; +CREATE TABLE t1 +(pk1 int not null primary key, b int not null, c int not null) +tablespace ts1 storage disk +engine ndb; +INSERT INTO t1 VALUES (0, 0, 0); +SELECT * FROM t1; +pk1 b c +0 0 0 +INSERT INTO t1 VALUES +(1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5), +(6,6,6),(7,7,7),(8,8,8),(9,9,9),(10,10,10), +(11,11,11),(12,12,12),(13,13,13),(14,14,14),(15,15,15), +(16,16,16),(17,17,17),(18,18,18),(19,19,19),(20,20,20), +(21,21,21),(22,22,22),(23,23,23),(24,24,24),(25,25,25), +(26,26,26),(27,27,27),(28,28,28),(29,29,29),(30,30,30), +(31,31,31),(32,32,32),(33,33,33),(34,34,34),(35,35,35), +(36,36,36),(37,37,37),(38,38,38),(39,39,39),(40,40,40), +(41,41,41),(42,42,42),(43,43,43),(44,44,44),(45,45,45), +(46,46,46),(47,47,47),(48,48,48),(49,49,49),(50,50,50), +(51,51,51),(52,52,52),(53,53,53),(54,54,54),(55,55,55), +(56,56,56),(57,57,57),(58,58,58),(59,59,59),(60,60,60), +(61,61,61),(62,62,62),(63,63,63),(64,64,64),(65,65,65), +(66,66,66),(67,67,67),(68,68,68),(69,69,69),(70,70,70), +(71,71,71),(72,72,72),(73,73,73),(74,74,74),(75,75,75), +(76,76,76),(77,77,77),(78,78,78),(79,79,79),(80,80,80), +(81,81,81),(82,82,82),(83,83,83),(84,84,84),(85,85,85), +(86,86,86),(87,87,87),(88,88,88),(89,89,89),(90,90,90), +(91,91,91),(92,92,92),(93,93,93),(94,94,94),(95,95,95), +(96,96,96),(97,97,97),(98,98,98),(99,99,99),(100,100,100), +(101,101,101),(102,102,102),(103,103,103),(104,104,104),(105,105,105), +(106,106,106),(107,107,107),(108,108,108),(109,109,109),(110,110,110), +(111,111,111),(112,112,112),(113,113,113),(114,114,114),(115,115,115), +(116,116,116),(117,117,117),(118,118,118),(119,119,119),(120,120,120), +(121,121,121),(122,122,122),(123,123,123),(124,124,124),(125,125,125), +(126,126,126),(127,127,127),(128,128,128),(129,129,129),(130,130,130), +(131,131,131),(132,132,132),(133,133,133),(134,134,134),(135,135,135), +(136,136,136),(137,137,137),(138,138,138),(139,139,139),(140,140,140), +(141,141,141),(142,142,142),(143,143,143),(144,144,144),(145,145,145), +(146,146,146),(147,147,147),(148,148,148),(149,149,149),(150,150,150), +(151,151,151),(152,152,152),(153,153,153),(154,154,154),(155,155,155), +(156,156,156),(157,157,157),(158,158,158),(159,159,159),(160,160,160), +(161,161,161),(162,162,162),(163,163,163),(164,164,164),(165,165,165), +(166,166,166),(167,167,167),(168,168,168),(169,169,169),(170,170,170), +(171,171,171),(172,172,172),(173,173,173),(174,174,174),(175,175,175), +(176,176,176),(177,177,177),(178,178,178),(179,179,179),(180,180,180), +(181,181,181),(182,182,182),(183,183,183),(184,184,184),(185,185,185), +(186,186,186),(187,187,187),(188,188,188),(189,189,189),(190,190,190), +(191,191,191),(192,192,192),(193,193,193),(194,194,194),(195,195,195), +(196,196,196),(197,197,197),(198,198,198),(199,199,199),(200,200,200), +(201,201,201),(202,202,202),(203,203,203),(204,204,204),(205,205,205), +(206,206,206),(207,207,207),(208,208,208),(209,209,209),(210,210,210), +(211,211,211),(212,212,212),(213,213,213),(214,214,214),(215,215,215), +(216,216,216),(217,217,217),(218,218,218),(219,219,219),(220,220,220), +(221,221,221),(222,222,222),(223,223,223),(224,224,224),(225,225,225), +(226,226,226),(227,227,227),(228,228,228),(229,229,229),(230,230,230), +(231,231,231),(232,232,232),(233,233,233),(234,234,234),(235,235,235), +(236,236,236),(237,237,237),(238,238,238),(239,239,239),(240,240,240), +(241,241,241),(242,242,242),(243,243,243),(244,244,244),(245,245,245), +(246,246,246),(247,247,247),(248,248,248),(249,249,249),(250,250,250), +(251,251,251),(252,252,252),(253,253,253),(254,254,254),(255,255,255), +(256,256,256),(257,257,257),(258,258,258),(259,259,259),(260,260,260), +(261,261,261),(262,262,262),(263,263,263),(264,264,264),(265,265,265), +(266,266,266),(267,267,267),(268,268,268),(269,269,269),(270,270,270), +(271,271,271),(272,272,272),(273,273,273),(274,274,274),(275,275,275), +(276,276,276),(277,277,277),(278,278,278),(279,279,279),(280,280,280), +(281,281,281),(282,282,282),(283,283,283),(284,284,284),(285,285,285), +(286,286,286),(287,287,287),(288,288,288),(289,289,289),(290,290,290), +(291,291,291),(292,292,292),(293,293,293),(294,294,294),(295,295,295), +(296,296,296),(297,297,297),(298,298,298),(299,299,299),(300,300,300), +(301,301,301),(302,302,302),(303,303,303),(304,304,304),(305,305,305), +(306,306,306),(307,307,307),(308,308,308),(309,309,309),(310,310,310), +(311,311,311),(312,312,312),(313,313,313),(314,314,314),(315,315,315), +(316,316,316),(317,317,317),(318,318,318),(319,319,319),(320,320,320), +(321,321,321),(322,322,322),(323,323,323),(324,324,324),(325,325,325), +(326,326,326),(327,327,327),(328,328,328),(329,329,329),(330,330,330), +(331,331,331),(332,332,332),(333,333,333),(334,334,334),(335,335,335), +(336,336,336),(337,337,337),(338,338,338),(339,339,339),(340,340,340), +(341,341,341),(342,342,342),(343,343,343),(344,344,344),(345,345,345), +(346,346,346),(347,347,347),(348,348,348),(349,349,349),(350,350,350), +(351,351,351),(352,352,352),(353,353,353),(354,354,354),(355,355,355), +(356,356,356),(357,357,357),(358,358,358),(359,359,359),(360,360,360), +(361,361,361),(362,362,362),(363,363,363),(364,364,364),(365,365,365), +(366,366,366),(367,367,367),(368,368,368),(369,369,369),(370,370,370), +(371,371,371),(372,372,372),(373,373,373),(374,374,374),(375,375,375), +(376,376,376),(377,377,377),(378,378,378),(379,379,379),(380,380,380), +(381,381,381),(382,382,382),(383,383,383),(384,384,384),(385,385,385), +(386,386,386),(387,387,387),(388,388,388),(389,389,389),(390,390,390), +(391,391,391),(392,392,392),(393,393,393),(394,394,394),(395,395,395), +(396,396,396),(397,397,397),(398,398,398),(399,399,399),(400,400,400), +(401,401,401),(402,402,402),(403,403,403),(404,404,404),(405,405,405), +(406,406,406),(407,407,407),(408,408,408),(409,409,409),(410,410,410), +(411,411,411),(412,412,412),(413,413,413),(414,414,414),(415,415,415), +(416,416,416),(417,417,417),(418,418,418),(419,419,419),(420,420,420), +(421,421,421),(422,422,422),(423,423,423),(424,424,424),(425,425,425), +(426,426,426),(427,427,427),(428,428,428),(429,429,429),(430,430,430), +(431,431,431),(432,432,432),(433,433,433),(434,434,434),(435,435,435), +(436,436,436),(437,437,437),(438,438,438),(439,439,439),(440,440,440), +(441,441,441),(442,442,442),(443,443,443),(444,444,444),(445,445,445), +(446,446,446),(447,447,447),(448,448,448),(449,449,449),(450,450,450), +(451,451,451),(452,452,452),(453,453,453),(454,454,454),(455,455,455), +(456,456,456),(457,457,457),(458,458,458),(459,459,459),(460,460,460), +(461,461,461),(462,462,462),(463,463,463),(464,464,464),(465,465,465), +(466,466,466),(467,467,467),(468,468,468),(469,469,469),(470,470,470), +(471,471,471),(472,472,472),(473,473,473),(474,474,474),(475,475,475), +(476,476,476),(477,477,477),(478,478,478),(479,479,479),(480,480,480), +(481,481,481),(482,482,482),(483,483,483),(484,484,484),(485,485,485), +(486,486,486),(487,487,487),(488,488,488),(489,489,489),(490,490,490), +(491,491,491),(492,492,492),(493,493,493),(494,494,494),(495,495,495), +(496,496,496),(497,497,497),(498,498,498),(499,499,499),(500, 500, 500); +SELECT COUNT(*) FROM t1; +COUNT(*) +501 +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10y +engine = ndb; +ERROR HY000: A size parameter was incorrectly specified, either number or on the form 10M +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10MB +engine=ndb; +ERROR HY000: A size parameter was incorrectly specified, either number or on the form 10M +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10 MB +engine=ndb; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'MB +engine=ndb' at line 3 +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10 M +engine=ndb; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'M +engine=ndb' at line 3 +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 1000000000000K +engine=ndb; +ERROR HY000: The size number was correct but we don't allow the digit part to be more than 2 billion +DROP TABLE t1; +create table t1 (a int primary key, b char(4) not null, c char(4) not null, key(b)) tablespace ts1 storage disk engine ndb; +insert into t1 values (1,'1','1'), (2,'2','2'), (3,'3','3'); +begin; +update t1 set b = '2' where a = 1; +select b from t1 where a = 1; +b +2 +select * from t1 where a = 1; +a b c +1 2 1 +update t1 set c = '2' where a = 1; +select b from t1 where a = 1; +b +2 +select * from t1 where a = 1; +a b c +1 2 2 +update t1 set b = '3' where a = 1; +select b from t1 where a = 1; +b +3 +select * from t1 where a = 1; +a b c +1 3 2 +commit; +select * from t1 order by 1; +a b c +1 3 2 +2 2 2 +3 3 3 +begin; +update t1 set c = '3' where a = 1; +select b from t1 where a = 1; +b +3 +select * from t1 where a = 1; +a b c +1 3 3 +update t1 set b = '4' where a = 1; +select b from t1 where a = 1; +b +4 +select * from t1 where a = 1; +a b c +1 4 3 +update t1 set c = '4' where a = 1; +select b from t1 where a = 1; +b +4 +select * from t1 where a = 1; +a b c +1 4 4 +commit; +select * from t1 order by 1; +a b c +1 4 4 +2 2 2 +3 3 3 +update t1 set b = '5' where a = 1; +select * from t1 order by 1; +a b c +1 5 4 +2 2 2 +3 3 3 +update t1 set b = '6' where b = '5'; +select * from t1 order by 1; +a b c +1 6 4 +2 2 2 +3 3 3 +update t1 set b = '7' where c = '4'; +select * from t1 order by 1; +a b c +1 7 4 +2 2 2 +3 3 3 +update t1 set c = '5' where a = 1; +select * from t1 order by 1; +a b c +1 7 5 +2 2 2 +3 3 3 +update t1 set c = '6' where b = '7'; +select * from t1 order by 1; +a b c +1 7 6 +2 2 2 +3 3 3 +update t1 set c = '7' where c = '6'; +select * from t1 order by 1; +a b c +1 7 7 +2 2 2 +3 3 3 +drop table t1; +create table t1 (a int primary key, b varchar(4) not null, c char(4) not null, key(b)) tablespace ts1 storage disk engine ndb; +insert into t1 values (1,'1','1'), (2,'2','2'), (3,'3','3'); +begin; +update t1 set b = '2' where a = 1; +select b from t1 where a = 1; +b +2 +select * from t1 where a = 1; +a b c +1 2 1 +update t1 set c = '2' where a = 1; +select b from t1 where a = 1; +b +2 +select * from t1 where a = 1; +a b c +1 2 2 +update t1 set b = '3' where a = 1; +select b from t1 where a = 1; +b +3 +select * from t1 where a = 1; +a b c +1 3 2 +commit; +select * from t1 order by 1; +a b c +1 3 2 +2 2 2 +3 3 3 +begin; +update t1 set c = '3' where a = 1; +select b from t1 where a = 1; +b +3 +select * from t1 where a = 1; +a b c +1 3 3 +update t1 set b = '4' where a = 1; +select b from t1 where a = 1; +b +4 +select * from t1 where a = 1; +a b c +1 4 3 +update t1 set c = '4' where a = 1; +select b from t1 where a = 1; +b +4 +select * from t1 where a = 1; +a b c +1 4 4 +commit; +select * from t1 order by 1; +a b c +1 4 4 +2 2 2 +3 3 3 +update t1 set b = '5' where a = 1; +select * from t1 order by 1; +a b c +1 5 4 +2 2 2 +3 3 3 +update t1 set b = '6' where b = '5'; +select * from t1 order by 1; +a b c +1 6 4 +2 2 2 +3 3 3 +update t1 set b = '7' where c = '4'; +select * from t1 order by 1; +a b c +1 7 4 +2 2 2 +3 3 3 +update t1 set c = '5' where a = 1; +select * from t1 order by 1; +a b c +1 7 5 +2 2 2 +3 3 3 +update t1 set c = '6' where b = '7'; +select * from t1 order by 1; +a b c +1 7 6 +2 2 2 +3 3 3 +update t1 set c = '7' where c = '6'; +select * from t1 order by 1; +a b c +1 7 7 +2 2 2 +3 3 3 +drop table t1; +create table t1 ( +a int not null primary key, +b text not null +) tablespace ts1 storage disk engine=ndbcluster; +set @x0 = '01234567012345670123456701234567'; +set @x0 = concat(@x0,@x0,@x0,@x0,@x0,@x0,@x0,@x0); +set @b1 = 'b1'; +set @b1 = concat(@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1); +set @b1 = concat(@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1); +set @b1 = concat(@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1); +set @b1 = concat(@b1,@x0); +set @b2 = 'b2'; +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); +insert into t1 values(1,@b1); +insert into t1 values(2,@b2); +select a,length(b),substr(b,1+2*900,2) from t1 where a=1; +a length(b) substr(b,1+2*900,2) +1 2256 b1 +select a,length(b),substr(b,1+2*9000,2) from t1 where a=2; +a length(b) substr(b,1+2*9000,2) +2 20000 b2 +update t1 set b=@b2 where a=1; +update t1 set b=@b1 where a=2; +select a,length(b),substr(b,1+2*9000,2) from t1 where a=1; +a length(b) substr(b,1+2*9000,2) +1 20000 b2 +select a,length(b),substr(b,1+2*900,2) from t1 where a=2; +a length(b) substr(b,1+2*900,2) +2 2256 b1 +update t1 set b=concat(b,b) where a=1; +update t1 set b=concat(b,b) where a=2; +select a,length(b),substr(b,1+4*9000,2) from t1 where a=1; +a length(b) substr(b,1+4*9000,2) +1 40000 b2 +select a,length(b),substr(b,1+4*900,2) from t1 where a=2; +a length(b) substr(b,1+4*900,2) +2 4512 b1 +delete from t1 where a=1; +delete from t1 where a=2; +select count(*) from t1; +count(*) +0 +drop table t1; +alter tablespace ts1 drop datafile 'datafile.dat' engine = ndb; +alter tablespace ts1 drop datafile 'datafile02.dat' engine = ndb; +drop tablespace ts1 engine = ndb; +drop logfile group lg1 engine = ndb; diff --git a/mysql-test/r/ndb_partition_key.result b/mysql-test/r/ndb_partition_key.result index c06ca5219f2..415b9d37b4d 100644 --- a/mysql-test/r/ndb_partition_key.result +++ b/mysql-test/r/ndb_partition_key.result @@ -55,6 +55,8 @@ Temporary table: no Number of attributes: 4 Number of primary keys: 3 Length of frm data: # +Row Checksum: 1 +Row GCI: 1 TableStatus: Retrieved -- Attributes -- a Int PRIMARY KEY AT=FIXED ST=MEMORY diff --git a/mysql-test/t/ndb_basic_disk.test b/mysql-test/t/ndb_basic_disk.test new file mode 100644 index 00000000000..8d7e988150a --- /dev/null +++ b/mysql-test/t/ndb_basic_disk.test @@ -0,0 +1,312 @@ +-- source include/have_ndb.inc + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +# +# Basic test of disk tables for NDB +# + +# +# Start by creating a logfile group +# + +CREATE LOGFILE GROUP lg1 +ADD UNDOFILE 'undofile.dat' +INITIAL_SIZE 16M +UNDO_BUFFER_SIZE = 1M +ENGINE=NDB; + +alter logfile group lg1 +add undofile 'undofile02.dat' +initial_size 4M engine=ndb; + +# +# Create a tablespace connected to the logfile group +# + +CREATE TABLESPACE ts1 +ADD DATAFILE 'datafile.dat' +USE LOGFILE GROUP lg1 +INITIAL_SIZE 12M +ENGINE NDB; + +alter tablespace ts1 +add datafile 'datafile02.dat' +initial_size 4M engine=ndb; + +# +# Create a table using this tablespace +# + +CREATE TABLE t1 +(pk1 int not null primary key, b int not null, c int not null) +tablespace ts1 storage disk +engine ndb; + +INSERT INTO t1 VALUES (0, 0, 0); +SELECT * FROM t1; + +INSERT INTO t1 VALUES +(1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5), +(6,6,6),(7,7,7),(8,8,8),(9,9,9),(10,10,10), +(11,11,11),(12,12,12),(13,13,13),(14,14,14),(15,15,15), +(16,16,16),(17,17,17),(18,18,18),(19,19,19),(20,20,20), +(21,21,21),(22,22,22),(23,23,23),(24,24,24),(25,25,25), +(26,26,26),(27,27,27),(28,28,28),(29,29,29),(30,30,30), +(31,31,31),(32,32,32),(33,33,33),(34,34,34),(35,35,35), +(36,36,36),(37,37,37),(38,38,38),(39,39,39),(40,40,40), +(41,41,41),(42,42,42),(43,43,43),(44,44,44),(45,45,45), +(46,46,46),(47,47,47),(48,48,48),(49,49,49),(50,50,50), +(51,51,51),(52,52,52),(53,53,53),(54,54,54),(55,55,55), +(56,56,56),(57,57,57),(58,58,58),(59,59,59),(60,60,60), +(61,61,61),(62,62,62),(63,63,63),(64,64,64),(65,65,65), +(66,66,66),(67,67,67),(68,68,68),(69,69,69),(70,70,70), +(71,71,71),(72,72,72),(73,73,73),(74,74,74),(75,75,75), +(76,76,76),(77,77,77),(78,78,78),(79,79,79),(80,80,80), +(81,81,81),(82,82,82),(83,83,83),(84,84,84),(85,85,85), +(86,86,86),(87,87,87),(88,88,88),(89,89,89),(90,90,90), +(91,91,91),(92,92,92),(93,93,93),(94,94,94),(95,95,95), +(96,96,96),(97,97,97),(98,98,98),(99,99,99),(100,100,100), +(101,101,101),(102,102,102),(103,103,103),(104,104,104),(105,105,105), +(106,106,106),(107,107,107),(108,108,108),(109,109,109),(110,110,110), +(111,111,111),(112,112,112),(113,113,113),(114,114,114),(115,115,115), +(116,116,116),(117,117,117),(118,118,118),(119,119,119),(120,120,120), +(121,121,121),(122,122,122),(123,123,123),(124,124,124),(125,125,125), +(126,126,126),(127,127,127),(128,128,128),(129,129,129),(130,130,130), +(131,131,131),(132,132,132),(133,133,133),(134,134,134),(135,135,135), +(136,136,136),(137,137,137),(138,138,138),(139,139,139),(140,140,140), +(141,141,141),(142,142,142),(143,143,143),(144,144,144),(145,145,145), +(146,146,146),(147,147,147),(148,148,148),(149,149,149),(150,150,150), +(151,151,151),(152,152,152),(153,153,153),(154,154,154),(155,155,155), +(156,156,156),(157,157,157),(158,158,158),(159,159,159),(160,160,160), +(161,161,161),(162,162,162),(163,163,163),(164,164,164),(165,165,165), +(166,166,166),(167,167,167),(168,168,168),(169,169,169),(170,170,170), +(171,171,171),(172,172,172),(173,173,173),(174,174,174),(175,175,175), +(176,176,176),(177,177,177),(178,178,178),(179,179,179),(180,180,180), +(181,181,181),(182,182,182),(183,183,183),(184,184,184),(185,185,185), +(186,186,186),(187,187,187),(188,188,188),(189,189,189),(190,190,190), +(191,191,191),(192,192,192),(193,193,193),(194,194,194),(195,195,195), +(196,196,196),(197,197,197),(198,198,198),(199,199,199),(200,200,200), +(201,201,201),(202,202,202),(203,203,203),(204,204,204),(205,205,205), +(206,206,206),(207,207,207),(208,208,208),(209,209,209),(210,210,210), +(211,211,211),(212,212,212),(213,213,213),(214,214,214),(215,215,215), +(216,216,216),(217,217,217),(218,218,218),(219,219,219),(220,220,220), +(221,221,221),(222,222,222),(223,223,223),(224,224,224),(225,225,225), +(226,226,226),(227,227,227),(228,228,228),(229,229,229),(230,230,230), +(231,231,231),(232,232,232),(233,233,233),(234,234,234),(235,235,235), +(236,236,236),(237,237,237),(238,238,238),(239,239,239),(240,240,240), +(241,241,241),(242,242,242),(243,243,243),(244,244,244),(245,245,245), +(246,246,246),(247,247,247),(248,248,248),(249,249,249),(250,250,250), +(251,251,251),(252,252,252),(253,253,253),(254,254,254),(255,255,255), +(256,256,256),(257,257,257),(258,258,258),(259,259,259),(260,260,260), +(261,261,261),(262,262,262),(263,263,263),(264,264,264),(265,265,265), +(266,266,266),(267,267,267),(268,268,268),(269,269,269),(270,270,270), +(271,271,271),(272,272,272),(273,273,273),(274,274,274),(275,275,275), +(276,276,276),(277,277,277),(278,278,278),(279,279,279),(280,280,280), +(281,281,281),(282,282,282),(283,283,283),(284,284,284),(285,285,285), +(286,286,286),(287,287,287),(288,288,288),(289,289,289),(290,290,290), +(291,291,291),(292,292,292),(293,293,293),(294,294,294),(295,295,295), +(296,296,296),(297,297,297),(298,298,298),(299,299,299),(300,300,300), +(301,301,301),(302,302,302),(303,303,303),(304,304,304),(305,305,305), +(306,306,306),(307,307,307),(308,308,308),(309,309,309),(310,310,310), +(311,311,311),(312,312,312),(313,313,313),(314,314,314),(315,315,315), +(316,316,316),(317,317,317),(318,318,318),(319,319,319),(320,320,320), +(321,321,321),(322,322,322),(323,323,323),(324,324,324),(325,325,325), +(326,326,326),(327,327,327),(328,328,328),(329,329,329),(330,330,330), +(331,331,331),(332,332,332),(333,333,333),(334,334,334),(335,335,335), +(336,336,336),(337,337,337),(338,338,338),(339,339,339),(340,340,340), +(341,341,341),(342,342,342),(343,343,343),(344,344,344),(345,345,345), +(346,346,346),(347,347,347),(348,348,348),(349,349,349),(350,350,350), +(351,351,351),(352,352,352),(353,353,353),(354,354,354),(355,355,355), +(356,356,356),(357,357,357),(358,358,358),(359,359,359),(360,360,360), +(361,361,361),(362,362,362),(363,363,363),(364,364,364),(365,365,365), +(366,366,366),(367,367,367),(368,368,368),(369,369,369),(370,370,370), +(371,371,371),(372,372,372),(373,373,373),(374,374,374),(375,375,375), +(376,376,376),(377,377,377),(378,378,378),(379,379,379),(380,380,380), +(381,381,381),(382,382,382),(383,383,383),(384,384,384),(385,385,385), +(386,386,386),(387,387,387),(388,388,388),(389,389,389),(390,390,390), +(391,391,391),(392,392,392),(393,393,393),(394,394,394),(395,395,395), +(396,396,396),(397,397,397),(398,398,398),(399,399,399),(400,400,400), +(401,401,401),(402,402,402),(403,403,403),(404,404,404),(405,405,405), +(406,406,406),(407,407,407),(408,408,408),(409,409,409),(410,410,410), +(411,411,411),(412,412,412),(413,413,413),(414,414,414),(415,415,415), +(416,416,416),(417,417,417),(418,418,418),(419,419,419),(420,420,420), +(421,421,421),(422,422,422),(423,423,423),(424,424,424),(425,425,425), +(426,426,426),(427,427,427),(428,428,428),(429,429,429),(430,430,430), +(431,431,431),(432,432,432),(433,433,433),(434,434,434),(435,435,435), +(436,436,436),(437,437,437),(438,438,438),(439,439,439),(440,440,440), +(441,441,441),(442,442,442),(443,443,443),(444,444,444),(445,445,445), +(446,446,446),(447,447,447),(448,448,448),(449,449,449),(450,450,450), +(451,451,451),(452,452,452),(453,453,453),(454,454,454),(455,455,455), +(456,456,456),(457,457,457),(458,458,458),(459,459,459),(460,460,460), +(461,461,461),(462,462,462),(463,463,463),(464,464,464),(465,465,465), +(466,466,466),(467,467,467),(468,468,468),(469,469,469),(470,470,470), +(471,471,471),(472,472,472),(473,473,473),(474,474,474),(475,475,475), +(476,476,476),(477,477,477),(478,478,478),(479,479,479),(480,480,480), +(481,481,481),(482,482,482),(483,483,483),(484,484,484),(485,485,485), +(486,486,486),(487,487,487),(488,488,488),(489,489,489),(490,490,490), +(491,491,491),(492,492,492),(493,493,493),(494,494,494),(495,495,495), +(496,496,496),(497,497,497),(498,498,498),(499,499,499),(500, 500, 500); + +SELECT COUNT(*) FROM t1; + +# +# Test error cases with size numbers +# +--error ER_WRONG_SIZE_NUMBER +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10y +engine = ndb; + +--error ER_WRONG_SIZE_NUMBER +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10MB +engine=ndb; + +--error 1064 +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10 MB +engine=ndb; + +--error 1064 +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 10 M +engine=ndb; + +--error ER_SIZE_OVERFLOW_ERROR +CREATE LOGFILE GROUP lg2 +ADD UNDOFILE 'x.dat' +INITIAL_SIZE 1000000000000K +engine=ndb; + +DROP TABLE t1; + +# Test update of mm/dd part +create table t1 (a int primary key, b char(4) not null, c char(4) not null, key(b)) tablespace ts1 storage disk engine ndb; +insert into t1 values (1,'1','1'), (2,'2','2'), (3,'3','3'); +begin; +update t1 set b = '2' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set c = '2' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set b = '3' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +commit; +select * from t1 order by 1; +begin; +update t1 set c = '3' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set b = '4' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set c = '4' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +commit; +select * from t1 order by 1; +update t1 set b = '5' where a = 1; +select * from t1 order by 1; +update t1 set b = '6' where b = '5'; +select * from t1 order by 1; +update t1 set b = '7' where c = '4'; +select * from t1 order by 1; +update t1 set c = '5' where a = 1; +select * from t1 order by 1; +update t1 set c = '6' where b = '7'; +select * from t1 order by 1; +update t1 set c = '7' where c = '6'; +select * from t1 order by 1; +drop table t1; +create table t1 (a int primary key, b varchar(4) not null, c char(4) not null, key(b)) tablespace ts1 storage disk engine ndb; +insert into t1 values (1,'1','1'), (2,'2','2'), (3,'3','3'); +begin; +update t1 set b = '2' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set c = '2' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set b = '3' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +commit; +select * from t1 order by 1; +begin; +update t1 set c = '3' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set b = '4' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +update t1 set c = '4' where a = 1; +select b from t1 where a = 1; +select * from t1 where a = 1; +commit; +select * from t1 order by 1; +update t1 set b = '5' where a = 1; +select * from t1 order by 1; +update t1 set b = '6' where b = '5'; +select * from t1 order by 1; +update t1 set b = '7' where c = '4'; +select * from t1 order by 1; +update t1 set c = '5' where a = 1; +select * from t1 order by 1; +update t1 set c = '6' where b = '7'; +select * from t1 order by 1; +update t1 set c = '7' where c = '6'; +select * from t1 order by 1; +drop table t1; + +# Test for blobs... +create table t1 ( + a int not null primary key, + b text not null +) tablespace ts1 storage disk engine=ndbcluster; + +# b1 length 2000+256 (blob part aligned) +set @x0 = '01234567012345670123456701234567'; +set @x0 = concat(@x0,@x0,@x0,@x0,@x0,@x0,@x0,@x0); +set @b1 = 'b1'; +set @b1 = concat(@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1); +set @b1 = concat(@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1); +set @b1 = concat(@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1,@b1); +set @b1 = concat(@b1,@x0); +# b2 length 20000 +set @b2 = 'b2'; +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); +set @b2 = concat(@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2,@b2); + +insert into t1 values(1,@b1); +insert into t1 values(2,@b2); +select a,length(b),substr(b,1+2*900,2) from t1 where a=1; +select a,length(b),substr(b,1+2*9000,2) from t1 where a=2; +update t1 set b=@b2 where a=1; +update t1 set b=@b1 where a=2; +select a,length(b),substr(b,1+2*9000,2) from t1 where a=1; +select a,length(b),substr(b,1+2*900,2) from t1 where a=2; +update t1 set b=concat(b,b) where a=1; +update t1 set b=concat(b,b) where a=2; +select a,length(b),substr(b,1+4*9000,2) from t1 where a=1; +select a,length(b),substr(b,1+4*900,2) from t1 where a=2; +delete from t1 where a=1; +delete from t1 where a=2; +select count(*) from t1; +drop table t1; + +alter tablespace ts1 drop datafile 'datafile.dat' engine = ndb; +alter tablespace ts1 drop datafile 'datafile02.dat' engine = ndb; +drop tablespace ts1 engine = ndb; + +drop logfile group lg1 engine = ndb; diff --git a/sql/Makefile.am b/sql/Makefile.am index e2cf53c13aa..c1cf4c73d54 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -98,7 +98,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ sp_head.cc sp_pcontext.cc sp_rcontext.cc sp.cc \ sp_cache.cc parse_file.cc sql_trigger.cc \ sql_plugin.cc sql_binlog.cc \ - handlerton.cc + handlerton.cc sql_tablespace.cc EXTRA_mysqld_SOURCES = ha_innodb.cc ha_berkeley.cc ha_archive.cc \ ha_innodb.h ha_berkeley.h ha_archive.h \ ha_blackhole.cc ha_federated.cc ha_ndbcluster.cc \ diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index 01d6ceed3f2..fb9ed2de117 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -149,6 +149,7 @@ handlerton berkeley_hton = { NULL, /* Start Consistent Snapshot */ berkeley_flush_logs, /* Flush logs */ berkeley_show_status, /* Show status */ + NULL, /* Alter Tablespace */ HTON_CLOSE_CURSORS_AT_COMMIT | HTON_FLUSH_AFTER_RENAME }; diff --git a/sql/ha_blackhole.cc b/sql/ha_blackhole.cc index 615836b9867..38e03d4d1f7 100644 --- a/sql/ha_blackhole.cc +++ b/sql/ha_blackhole.cc @@ -57,6 +57,7 @@ handlerton blackhole_hton= { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_CAN_RECREATE }; diff --git a/sql/ha_federated.cc b/sql/ha_federated.cc index bc087ac25e7..b218b52bfd9 100644 --- a/sql/ha_federated.cc +++ b/sql/ha_federated.cc @@ -394,6 +394,7 @@ handlerton federated_hton= { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_ALTER_NOT_SUPPORTED }; diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index a83a95ac863..bcb0bf07774 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -54,6 +54,7 @@ handlerton heap_hton= { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_CAN_RECREATE }; diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index 41000564e53..87bc2148b03 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -86,6 +86,7 @@ handlerton myisam_hton= { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_CAN_RECREATE }; diff --git a/sql/ha_myisammrg.cc b/sql/ha_myisammrg.cc index ccb3475e34f..601fe94bf11 100644 --- a/sql/ha_myisammrg.cc +++ b/sql/ha_myisammrg.cc @@ -64,6 +64,7 @@ handlerton myisammrg_hton= { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_CAN_RECREATE }; diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index 0cef2d1521c..7bc3af2c3aa 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -57,6 +57,7 @@ static int ndbcluster_close_connection(THD *thd); static int ndbcluster_commit(THD *thd, bool all); static int ndbcluster_rollback(THD *thd, bool all); static handler* ndbcluster_create_handler(TABLE_SHARE *table); +static int ndbcluster_alter_tablespace(THD* thd, st_alter_tablespace *info); handlerton ndbcluster_hton = { MYSQL_HANDLERTON_INTERFACE_VERSION, @@ -86,6 +87,7 @@ handlerton ndbcluster_hton = { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ ndbcluster_show_status, /* Show status */ + ndbcluster_alter_tablespace, HTON_NO_FLAGS }; @@ -4019,13 +4021,8 @@ int ha_ndbcluster::create(const char *name, field->pack_length())); if ((my_errno= create_ndb_column(col, field, info))) DBUG_RETURN(my_errno); - - if ( -#ifdef NDB_DISKDATA - info->store_on_disk || -#else - getenv("NDB_DEFAULT_DISK")) -#endif + + if (info->store_on_disk || getenv("NDB_DEFAULT_DISK")) col.setStorageType(NdbDictionary::Column::StorageTypeDisk); else col.setStorageType(NdbDictionary::Column::StorageTypeMemory); @@ -4045,14 +4042,11 @@ int ha_ndbcluster::create(const char *name, NdbDictionary::Column::StorageTypeMemory); } -#ifdef NDB_DISKDATA if (info->store_on_disk) if (info->tablespace) tab.setTablespace(info->tablespace); else tab.setTablespace("DEFAULT-TS"); -#endif - // No primary key, create shadow key as 64 bit, auto increment if (form->s->primary_key == MAX_KEY) { @@ -8315,7 +8309,6 @@ bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *info, return COMPATIBLE_DATA_YES; } -#ifdef NDB_DISKDATA bool set_up_tablespace(st_alter_tablespace *info, NdbDictionary::Tablespace *ndb_ts) { @@ -8356,21 +8349,25 @@ bool set_up_undofile(st_alter_tablespace *info, return false; } -int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) +int ndbcluster_alter_tablespace(THD* thd, st_alter_tablespace *info) { - Ndb *ndb; - NDBDICT *dict; - int error; DBUG_ENTER("ha_ndbcluster::alter_tablespace"); - if (check_ndb_connection()) + + Ndb *ndb= check_ndb_in_thd(thd); + if (ndb == NULL) { - DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION); + DBUG_RETURN(HA_ERR_NO_CONNECTION); } - ndb= get_ndb(); - dict= ndb->getDictionary(); + + NDBDICT *dict = ndb->getDictionary(); + int error; + const char * errmsg; + switch (info->ts_cmd_type){ case (CREATE_TABLESPACE): { + error= ER_CREATE_TABLESPACE_FAILED; + NdbDictionary::Tablespace ndb_ts; NdbDictionary::Datafile ndb_df; if (set_up_tablespace(info, &ndb_ts)) @@ -8381,23 +8378,24 @@ int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) { DBUG_RETURN(1); } - if (error= dict->createTablespace(ndb_ts)) + errmsg= "TABLESPACE"; + if (dict->createTablespace(ndb_ts)) { DBUG_PRINT("error", ("createTablespace returned %d", error)); - my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "TABLESPACE"); - DBUG_RETURN(1); + goto ndberror; } DBUG_PRINT("info", ("Successfully created Tablespace")); - if (error= dict->createDatafile(ndb_df)) + errmsg= "DATAFILE"; + if (dict->createDatafile(ndb_df)) { DBUG_PRINT("error", ("createDatafile returned %d", error)); - my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "DATAFILE"); - DBUG_RETURN(1); + goto ndberror; } break; } case (ALTER_TABLESPACE): { + error= ER_ALTER_TABLESPACE_FAILED; if (info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE) { NdbDictionary::Datafile ndb_df; @@ -8405,11 +8403,10 @@ int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) { DBUG_RETURN(1); } - if (error= dict->createDatafile(ndb_df)) + errmsg= " CREATE DATAFILE"; + if (dict->createDatafile(ndb_df)) { - DBUG_PRINT("error", ("createDatafile returned %d", error)); - my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), "CREATE DATAFILE"); - DBUG_RETURN(1); + goto ndberror; } } else if(info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE) @@ -8418,11 +8415,10 @@ int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) info->data_file_name); if (strcmp(df.getPath(), info->data_file_name) == 0) { - if (error= dict->dropDatafile(df)) + errmsg= " DROP DATAFILE"; + if (dict->dropDatafile(df)) { - DBUG_PRINT("error", ("createDatafile returned %d", error)); - my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), " DROP DATAFILE"); - DBUG_RETURN(1); + goto ndberror; } } else @@ -8442,6 +8438,7 @@ int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) } case (CREATE_LOGFILE_GROUP): { + error= ER_CREATE_TABLESPACE_FAILED; NdbDictionary::LogfileGroup ndb_lg; NdbDictionary::Undofile ndb_uf; if (info->undo_file_name == NULL) @@ -8455,27 +8452,26 @@ int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) { DBUG_RETURN(1); } - if (error= dict->createLogfileGroup(ndb_lg)) + errmsg= "LOGFILE GROUP"; + if (dict->createLogfileGroup(ndb_lg)) { - DBUG_PRINT("error", ("createLogfileGroup returned %d", error)); - my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "LOGFILE GROUP"); - DBUG_RETURN(1); + goto ndberror; } DBUG_PRINT("info", ("Successfully created Logfile Group")); if (set_up_undofile(info, &ndb_uf)) { DBUG_RETURN(1); } - if (error= dict->createUndofile(ndb_uf)) + errmsg= "UNDOFILE"; + if (dict->createUndofile(ndb_uf)) { - DBUG_PRINT("error", ("createUndofile returned %d", error)); - my_error(ER_CREATE_TABLESPACE_FAILED, MYF(0), "UNDOFILE"); - DBUG_RETURN(1); + goto ndberror; } break; } case (ALTER_LOGFILE_GROUP): { + error= ER_ALTER_TABLESPACE_FAILED; if (info->undo_file_name == NULL) { /* @@ -8488,32 +8484,30 @@ int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) { DBUG_RETURN(1); } - if (error= dict->createUndofile(ndb_uf)) + errmsg= "CREATE UNDOFILE"; + if (dict->createUndofile(ndb_uf)) { - DBUG_PRINT("error", ("createUndofile returned %d", error)); - my_error(ER_ALTER_TABLESPACE_FAILED, MYF(0), "CREATE UNDOFILE"); - DBUG_RETURN(1); + goto ndberror; } break; } case (DROP_TABLESPACE): { - if (error= dict->dropTablespace( - dict->getTablespace(info->tablespace_name))) + error= ER_DROP_TABLESPACE_FAILED; + errmsg= "TABLESPACE"; + if (dict->dropTablespace(dict->getTablespace(info->tablespace_name))) { - DBUG_PRINT("error", ("dropTablespace returned %d", error)); - my_error(ER_DROP_TABLESPACE_FAILED, MYF(0), "TABLESPACE"); - DBUG_RETURN(1); + goto ndberror; } break; } case (DROP_LOGFILE_GROUP): { - if (error= dict->dropLogfileGroup(dict->getLogfileGroup(info->logfile_group_name))) + error= ER_DROP_TABLESPACE_FAILED; + errmsg= "LOGFILE GROUP"; + if (dict->dropLogfileGroup(dict->getLogfileGroup(info->logfile_group_name))) { - DBUG_PRINT("error", ("dropLogfileGroup returned %d", error)); - my_error(ER_DROP_TABLESPACE_FAILED, MYF(0), "LOGFILE GROUP"); - DBUG_RETURN(1); + goto ndberror; } break; } @@ -8531,6 +8525,13 @@ int ha_ndbcluster::alter_tablespace(st_alter_tablespace *info) } } DBUG_RETURN(FALSE); + +ndberror: + const NdbError err= dict->getNdbError(); + ERR_PRINT(err); + ndb_to_mysql_error(&err); + + my_error(error, MYF(0), errmsg); + DBUG_RETURN(1); } -#endif /* NDB_DISKDATA */ diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h index 01d14786600..f05c1c32a1a 100644 --- a/sql/ha_ndbcluster.h +++ b/sql/ha_ndbcluster.h @@ -511,6 +511,7 @@ class ha_ndbcluster: public handler bool eq_range, bool sorted, byte* buf); int read_range_next(); + int alter_tablespace(st_alter_tablespace *info); /** * Multi range stuff diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index ce483bb5c57..d7549c1a95b 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -96,6 +96,7 @@ handlerton partition_hton = { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_NOT_USER_SELECTABLE | HTON_HIDDEN }; diff --git a/sql/handler.h b/sql/handler.h index 36449fb1307..f6680679a35 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -315,6 +315,82 @@ typedef struct xid_t XID; #define MAX_XID_LIST_SIZE (1024*128) #endif +/* + These structures are used to pass information from a set of SQL commands + on add/drop/change tablespace definitions to the proper hton. +*/ +#define UNDEF_NODEGROUP 65535 +enum ts_command_type +{ + TS_CMD_NOT_DEFINED = -1, + CREATE_TABLESPACE = 0, + ALTER_TABLESPACE = 1, + CREATE_LOGFILE_GROUP = 2, + ALTER_LOGFILE_GROUP = 3, + DROP_TABLESPACE = 4, + DROP_LOGFILE_GROUP = 5, + CHANGE_FILE_TABLESPACE = 6, + ALTER_ACCESS_MODE_TABLESPACE = 7 +}; + +enum ts_alter_tablespace_type +{ + TS_ALTER_TABLESPACE_TYPE_NOT_DEFINED = -1, + ALTER_TABLESPACE_ADD_FILE = 1, + ALTER_TABLESPACE_DROP_FILE = 2 +}; + +enum tablespace_access_mode +{ + TS_NOT_DEFINED= -1, + TS_READ_ONLY = 0, + TS_READ_WRITE = 1, + TS_NOT_ACCESSIBLE = 2 +}; + +class st_alter_tablespace : public Sql_alloc +{ + public: + const char *tablespace_name; + const char *logfile_group_name; + enum ts_command_type ts_cmd_type; + enum ts_alter_tablespace_type ts_alter_tablespace_type; + const char *data_file_name; + const char *undo_file_name; + const char *redo_file_name; + ulonglong extent_size; + ulonglong undo_buffer_size; + ulonglong redo_buffer_size; + ulonglong initial_size; + ulonglong autoextend_size; + ulonglong max_size; + uint nodegroup_id; + enum legacy_db_type storage_engine; + bool wait_until_completed; + const char *ts_comment; + enum tablespace_access_mode ts_access_mode; + st_alter_tablespace() + { + tablespace_name= NULL; + logfile_group_name= "DEFAULT_LG"; //Default log file group + ts_cmd_type= TS_CMD_NOT_DEFINED; + data_file_name= NULL; + undo_file_name= NULL; + redo_file_name= NULL; + extent_size= 1024*1024; //Default 1 MByte + undo_buffer_size= 8*1024*1024; //Default 8 MByte + redo_buffer_size= 8*1024*1024; //Default 8 MByte + initial_size= 128*1024*1024; //Default 128 MByte + autoextend_size= 0; //No autoextension as default + max_size= 0; //Max size == initial size => no extension + storage_engine= DB_TYPE_UNKNOWN; + nodegroup_id= UNDEF_NODEGROUP; + wait_until_completed= TRUE; + ts_comment= NULL; + ts_access_mode= TS_NOT_DEFINED; + } +}; + /* The handler for a table type. Will be included in the TABLE structure */ struct st_table; @@ -434,6 +510,7 @@ typedef struct int (*start_consistent_snapshot)(THD *thd); bool (*flush_logs)(); bool (*show_status)(THD *thd, stat_print_fn *print, enum ha_stat_type stat); + int (*alter_tablespace)(THD *thd, st_alter_tablespace *ts_info); uint32 flags; /* global handler flags */ } handlerton; @@ -732,7 +809,7 @@ typedef struct st_ha_create_information { CHARSET_INFO *table_charset, *default_table_charset; LEX_STRING connect_string; - const char *comment,*password; + const char *comment,*password, *tablespace; const char *data_file_name, *index_file_name; const char *alias; ulonglong max_rows,min_rows; @@ -752,6 +829,7 @@ typedef struct st_ha_create_information bool table_existed; /* 1 in create if table existed */ bool frm_only; /* 1 if no ha_create_table() */ bool varchar; /* 1 if table has a VARCHAR */ + bool store_on_disk; /* 1 if table stored on disk */ } HA_CREATE_INFO; @@ -830,7 +908,6 @@ typedef struct st_handler_buffer byte *end_of_used_area; /* End of area that was used by handler */ } HANDLER_BUFFER; - class handler :public Sql_alloc { #ifdef WITH_PARTITION_STORAGE_ENGINE diff --git a/sql/lex.h b/sql/lex.h index cf83fc9488c..93d16856cf1 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -59,6 +59,7 @@ static SYMBOL symbols[] = { { "<<", SYM(SHIFT_LEFT)}, { ">>", SYM(SHIFT_RIGHT)}, { "<=>", SYM(EQUAL_SYM)}, + { "ACCESSIBLE", SYM(ACCESSIBLE_SYM)}, { "ACTION", SYM(ACTION)}, { "ADD", SYM(ADD)}, { "AFTER", SYM(AFTER_SYM)}, @@ -76,6 +77,7 @@ static SYMBOL symbols[] = { { "ASENSITIVE", SYM(ASENSITIVE_SYM)}, { "AUTHORS", SYM(AUTHORS_SYM)}, { "AUTO_INCREMENT", SYM(AUTO_INC)}, + { "AUTOEXTEND_SIZE", SYM(AUTOEXTEND_SIZE_SYM)}, { "AVG", SYM(AVG_SYM)}, { "AVG_ROW_LENGTH", SYM(AVG_ROW_LENGTH)}, { "BACKUP", SYM(BACKUP_SYM)}, @@ -141,6 +143,7 @@ static SYMBOL symbols[] = { { "DATA", SYM(DATA_SYM)}, { "DATABASE", SYM(DATABASE)}, { "DATABASES", SYM(DATABASES)}, + { "DATAFILE", SYM(DATAFILE_SYM)}, { "DATE", SYM(DATE_SYM)}, { "DATETIME", SYM(DATETIME)}, { "DAY", SYM(DAY_SYM)}, @@ -164,6 +167,7 @@ static SYMBOL symbols[] = { { "DIRECTORY", SYM(DIRECTORY_SYM)}, { "DISABLE", SYM(DISABLE_SYM)}, { "DISCARD", SYM(DISCARD)}, + { "DISK", SYM(DISK_SYM)}, { "DISTINCT", SYM(DISTINCT)}, { "DISTINCTROW", SYM(DISTINCT)}, /* Access likes this */ { "DIV", SYM(DIV_SYM)}, @@ -193,6 +197,7 @@ static SYMBOL symbols[] = { { "EXPANSION", SYM(EXPANSION_SYM)}, { "EXPLAIN", SYM(DESCRIBE)}, { "EXTENDED", SYM(EXTENDED_SYM)}, + { "EXTENT_SIZE", SYM(EXTENT_SIZE_SYM)}, { "FALSE", SYM(FALSE_SYM)}, { "FAST", SYM(FAST_SYM)}, { "FETCH", SYM(FETCH_SYM)}, @@ -241,6 +246,7 @@ static SYMBOL symbols[] = { { "INDEX", SYM(INDEX_SYM)}, { "INDEXES", SYM(INDEXES)}, { "INFILE", SYM(INFILE)}, + { "INITIAL_SIZE", SYM(INITIAL_SIZE_SYM)}, { "INNER", SYM(INNER_SYM)}, { "INNOBASE", SYM(INNOBASE_SYM)}, { "INNODB", SYM(INNOBASE_SYM)}, @@ -292,6 +298,7 @@ static SYMBOL symbols[] = { { "LOCALTIMESTAMP", SYM(NOW_SYM)}, { "LOCK", SYM(LOCK_SYM)}, { "LOCKS", SYM(LOCKS_SYM)}, + { "LOGFILE", SYM(LOGFILE_SYM)}, { "LOGS", SYM(LOGS_SYM)}, { "LONG", SYM(LONG_SYM)}, { "LONGBLOB", SYM(LONGBLOB)}, @@ -317,6 +324,7 @@ static SYMBOL symbols[] = { { "MAX_CONNECTIONS_PER_HOUR", SYM(MAX_CONNECTIONS_PER_HOUR)}, { "MAX_QUERIES_PER_HOUR", SYM(MAX_QUERIES_PER_HOUR)}, { "MAX_ROWS", SYM(MAX_ROWS)}, + { "MAX_SIZE", SYM(MAX_SIZE_SYM)}, { "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)}, { "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)}, { "MAXVALUE", SYM(MAX_VALUE_SYM)}, @@ -324,6 +332,7 @@ static SYMBOL symbols[] = { { "MEDIUMBLOB", SYM(MEDIUMBLOB)}, { "MEDIUMINT", SYM(MEDIUMINT)}, { "MEDIUMTEXT", SYM(MEDIUMTEXT)}, + { "MEMORY", SYM(MEMORY_SYM)}, { "MERGE", SYM(MERGE_SYM)}, { "MICROSECOND", SYM(MICROSECOND_SYM)}, { "MIDDLEINT", SYM(MEDIUMINT)}, /* For powerbuilder */ @@ -351,7 +360,8 @@ static SYMBOL symbols[] = { { "NEW", SYM(NEW_SYM)}, { "NEXT", SYM(NEXT_SYM)}, { "NO", SYM(NO_SYM)}, - { "NODEGROUP", SYM(NODEGROUP_SYM)}, + { "NO_WAIT", SYM(NO_WAIT_SYM)}, + { "NODEGROUP", SYM(NODEGROUP_SYM)}, { "NONE", SYM(NONE_SYM)}, { "NOT", SYM(NOT_SYM)}, { "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)}, @@ -400,9 +410,13 @@ static SYMBOL symbols[] = { { "RAID_TYPE", SYM(RAID_TYPE)}, { "RANGE", SYM(RANGE_SYM)}, { "READ", SYM(READ_SYM)}, + { "READ_ONLY", SYM(READ_ONLY_SYM)}, + { "READ_WRITE", SYM(READ_WRITE_SYM)}, { "READS", SYM(READS_SYM)}, { "REAL", SYM(REAL)}, { "RECOVER", SYM(RECOVER_SYM)}, + { "REDO_BUFFER_SIZE", SYM(REDO_BUFFER_SIZE_SYM)}, + { "REDOFILE", SYM(REDOFILE_SYM)}, { "REDUNDANT", SYM(REDUNDANT_SYM)}, { "REFERENCES", SYM(REFERENCES)}, { "REGEXP", SYM(REGEXP)}, @@ -522,6 +536,8 @@ static SYMBOL symbols[] = { { "TYPES", SYM(TYPES_SYM)}, { "UNCOMMITTED", SYM(UNCOMMITTED_SYM)}, { "UNDEFINED", SYM(UNDEFINED_SYM)}, + { "UNDO_BUFFER_SIZE", SYM(UNDO_BUFFER_SIZE_SYM)}, + { "UNDOFILE", SYM(UNDOFILE_SYM)}, { "UNDO", SYM(UNDO_SYM)}, { "UNICODE", SYM(UNICODE_SYM)}, { "UNION", SYM(UNION_SYM)}, @@ -548,6 +564,7 @@ static SYMBOL symbols[] = { { "VARCHARACTER", SYM(VARCHAR)}, { "VARIABLES", SYM(VARIABLES)}, { "VARYING", SYM(VARYING)}, + { "WAIT", SYM(WAIT_SYM)}, { "WARNINGS", SYM(WARNINGS)}, { "WEEK", SYM(WEEK_SYM)}, { "WHEN", SYM(WHEN_SYM)}, diff --git a/sql/log.cc b/sql/log.cc index 45ce7776d1a..fe95419fffd 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -88,6 +88,7 @@ handlerton binlog_hton = { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_NOT_USER_SELECTABLE | HTON_HIDDEN }; diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index ffee875a7e9..b3bc49b31d1 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -691,6 +691,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<create_field> &field_list); bool mysql_xa_recover(THD *thd); bool check_simple_select(); +int mysql_alter_tablespace(THD* thd, st_alter_tablespace *ts_info); SORT_FIELD * make_unireg_sortorder(ORDER *order, uint *length); int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt index 4bd40978893..7f76686f953 100644 --- a/sql/share/errmsg.txt +++ b/sql/share/errmsg.txt @@ -5727,6 +5727,20 @@ ER_WRONG_VALUE eng "Incorrect %-.32s value: '%-.128s'" ER_NO_PARTITION_FOR_GIVEN_VALUE eng "Table has no partition for value %ld" +ER_TABLESPACE_OPTION_ONLY_ONCE + eng "It is not allowed to specify %s more than once" +ER_CREATE_TABLESPACE_FAILED + eng "Failed to create %s" +ER_DROP_TABLESPACE_FAILED + eng "Failed to drop %s" +ER_TABLESPACE_AUTO_EXTEND_ERROR + eng "The handler doesn't support autoextend of tablespaces" +ER_WRONG_SIZE_NUMBER + eng "A size parameter was incorrectly specified, either number or on the form 10M" +ER_SIZE_OVERFLOW_ERROR + eng "The size number was correct but we don't allow the digit part to be more than 2 billion" +ER_ALTER_TABLESPACE_FAILED + eng "Failed to alter: %s" ER_BINLOG_ROW_LOGGING_FAILED eng "Writing one row to the row-based binary log failed" ER_BINLOG_ROW_WRONG_TABLE_DEF diff --git a/sql/sql_lex.h b/sql/sql_lex.h index eb2be2691b3..f239b7825ef 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -25,6 +25,7 @@ class sp_head; class sp_name; class sp_instr; class sp_pcontext; +class st_alter_tablespace; class partition_info; /* @@ -92,6 +93,7 @@ enum enum_sql_command { SQLCOM_XA_START, SQLCOM_XA_END, SQLCOM_XA_PREPARE, SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE, + SQLCOM_ALTER_TABLESPACE, SQLCOM_INSTALL_PLUGIN, SQLCOM_UNINSTALL_PLUGIN, SQLCOM_SHOW_AUTHORS, SQLCOM_BINLOG_BASE64_EVENT, SQLCOM_SHOW_PLUGINS, @@ -952,6 +954,12 @@ typedef struct st_lex during replication ("LOCAL 'filename' REPLACE INTO" part). */ const uchar *fname_start, *fname_end; + + /* + Reference to a struct that contains information in various commands + to add/create/drop/change table spaces. + */ + st_alter_tablespace *alter_tablespace_info; bool escape_used; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 1b36ca8dc97..57682d82880 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -4818,6 +4818,12 @@ end_with_restore_list: case SQLCOM_XA_RECOVER: res= mysql_xa_recover(thd); break; + case SQLCOM_ALTER_TABLESPACE: + if (check_access(thd, ALTER_ACL, thd->db, 0, 1, 0, thd->db ? is_schema_db(thd->db) : 0)) + break; + if (!(res= mysql_alter_tablespace(thd, lex->alter_tablespace_info))) + send_ok(thd); + break; case SQLCOM_INSTALL_PLUGIN: if (! (res= mysql_install_plugin(thd, &thd->lex->comment, &thd->lex->ident))) diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc new file mode 100644 index 00000000000..0c99180365e --- /dev/null +++ b/sql/sql_tablespace.cc @@ -0,0 +1,50 @@ +/* Copyright (C) 2000-2004 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* drop and alter of tablespaces */ + +#include "mysql_priv.h" + +int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info) +{ + int error= HA_ADMIN_NOT_IMPLEMENTED; + handlerton *hton; + + DBUG_ENTER("mysql_alter_tablespace"); + /* + If the user haven't defined an engine, this will fallback to using the + default storage engine. + */ + hton= ha_resolve_by_legacy_type(thd, ts_info->storage_engine); + + if (hton->alter_tablespace && (error= hton->alter_tablespace(thd, ts_info))) + { + if (error == HA_ADMIN_NOT_IMPLEMENTED) + { + my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), ""); + } + else if (error == 1) + { + DBUG_RETURN(1); + } + else + { + my_error(error, MYF(0)); + } + DBUG_RETURN(error); + } + DBUG_RETURN(FALSE); +} diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 7ebe851fc85..f8a1548ecb8 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -119,6 +119,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token END_OF_INPUT %token ABORT_SYM +%token ACCESSIBLE_SYM %token ACTION %token ADD %token ADDDATE_SYM @@ -139,6 +140,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ATAN %token AUTHORS_SYM %token AUTO_INC +%token AUTOEXTEND_SIZE_SYM %token AVG_ROW_LENGTH %token AVG_SYM %token BACKUP_SYM @@ -208,6 +210,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token CURTIME %token DATABASE %token DATABASES +%token DATAFILE_SYM %token DATA_SYM %token DATETIME %token DATE_ADD_INTERVAL @@ -237,6 +240,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token DIRECTORY_SYM %token DISABLE_SYM %token DISCARD +%token DISK_SYM %token DISTINCT %token DIV_SYM %token DOUBLE_SYM @@ -269,6 +273,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token EXPANSION_SYM %token EXPORT_SET %token EXTENDED_SYM +%token EXTENT_SIZE_SYM %token EXTRACT_SYM %token FALSE_SYM %token FAST_SYM @@ -331,6 +336,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token INDEXES %token INDEX_SYM %token INFILE +%token INITIAL_SIZE_SYM %token INNER_SYM %token INNOBASE_SYM %token INOUT_SYM @@ -377,6 +383,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token LOCATOR_SYM %token LOCKS_SYM %token LOCK_SYM +%token LOGFILE_SYM %token LOGS_SYM %token LOG_SYM %token LONGBLOB @@ -407,6 +414,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MAX_CONNECTIONS_PER_HOUR %token MAX_QUERIES_PER_HOUR %token MAX_ROWS +%token MAX_SIZE_SYM %token MAX_SYM %token MAX_UPDATES_PER_HOUR %token MAX_USER_CONNECTIONS_SYM @@ -415,6 +423,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MEDIUMINT %token MEDIUMTEXT %token MEDIUM_SYM +%token MEMORY_SYM %token MERGE_SYM %token MICROSECOND_SYM %token MIGRATE_SYM @@ -451,6 +460,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NOT_SYM %token NOW_SYM %token NO_SYM +%token NO_WAIT_SYM %token NO_WRITE_TO_BINLOG %token NULL_SYM %token NUM @@ -506,9 +516,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token RAND %token RANGE_SYM %token READS_SYM +%token READ_ONLY_SYM %token READ_SYM +%token READ_WRITE_SYM %token REAL %token RECOVER_SYM +%token REDO_BUFFER_SIZE_SYM +%token REDOFILE_SYM %token REDUNDANT_SYM %token REFERENCES %token REGEXP @@ -630,6 +644,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ULONGLONG_NUM %token UNCOMMITTED_SYM %token UNDEFINED_SYM +%token UNDO_BUFFER_SIZE_SYM +%token UNDOFILE_SYM %token UNDERSCORE_CHARSET %token UNDO_SYM %token UNICODE_SYM @@ -660,6 +676,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token VARIANCE_SYM %token VARYING %token VIEW_SYM +%token WAIT_SYM %token WARNINGS %token WEEK_SYM %token WHEN_SYM @@ -727,7 +744,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); ulong_num raid_types merge_insert_types %type <ulonglong_number> - ulonglong_num + ulonglong_num size_number %type <longlong_number> part_bit_expr @@ -1296,6 +1313,16 @@ create: { Lex->sql_command = SQLCOM_CREATE_USER; } + | CREATE LOGFILE_SYM GROUP logfile_group_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= CREATE_LOGFILE_GROUP; + } + | CREATE TABLESPACE tablespace_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= CREATE_TABLESPACE; + } ; clear_privileges: @@ -2538,6 +2565,382 @@ trg_event: | DELETE_SYM { Lex->trg_chistics.event= TRG_EVENT_DELETE; } ; +/* + This part of the parser contains common code for all TABLESPACE + commands. + CREATE TABLESPACE name ... + ALTER TABLESPACE name CHANGE DATAFILE ... + ALTER TABLESPACE name ADD DATAFILE ... + ALTER TABLESPACE name access_mode + CREATE LOGFILE GROUP name ... + ALTER LOGFILE GROUP name ADD UNDOFILE .. + ALTER LOGFILE GROUP name ADD REDOFILE .. + DROP TABLESPACE name + DROP LOGFILE GROUP name +*/ +change_tablespace_access: + tablespace_name + ts_access_mode + ; + +change_tablespace_info: + tablespace_name + CHANGE ts_datafile + change_ts_option_list + ; + +tablespace_info: + tablespace_name + ADD ts_datafile + opt_logfile_group_name + tablespace_option_list + ; + +opt_logfile_group_name: + /* empty */ {} + | USE_SYM LOGFILE_SYM GROUP ident + { + LEX *lex= Lex; + lex->alter_tablespace_info->logfile_group_name= $4.str; + }; + +alter_tablespace_info: + tablespace_name + ADD ts_datafile + alter_tablespace_option_list + { + Lex->alter_tablespace_info->ts_alter_tablespace_type= ALTER_TABLESPACE_ADD_FILE; + } + | + tablespace_name + DROP ts_datafile + alter_tablespace_option_list + { + Lex->alter_tablespace_info->ts_alter_tablespace_type= ALTER_TABLESPACE_DROP_FILE; + }; + +logfile_group_info: + logfile_group_name + add_log_file + logfile_group_option_list + ; + +alter_logfile_group_info: + logfile_group_name + add_log_file + alter_logfile_group_option_list + ; + +add_log_file: + ADD lg_undofile + | ADD lg_redofile + ; + +change_ts_option_list: + /* empty */ {} + change_ts_options + ; + +change_ts_options: + change_ts_option + | change_ts_options change_ts_option + | change_ts_options ',' change_ts_option + ; + +change_ts_option: + opt_ts_initial_size + | opt_ts_autoextend_size + | opt_ts_max_size + ; + +tablespace_option_list: + /* empty */ {} + tablespace_options + ; + +tablespace_options: + tablespace_option + | tablespace_options tablespace_option + | tablespace_options ',' tablespace_option + ; + +tablespace_option: + opt_ts_initial_size + | opt_ts_autoextend_size + | opt_ts_max_size + | opt_ts_extent_size + | opt_ts_nodegroup + | opt_ts_engine + | ts_wait + | opt_ts_comment + ; + +alter_tablespace_option_list: + /* empty */ {} + alter_tablespace_options + ; + +alter_tablespace_options: + alter_tablespace_option + | alter_tablespace_options alter_tablespace_option + | alter_tablespace_options ',' alter_tablespace_option + ; + +alter_tablespace_option: + opt_ts_initial_size + | opt_ts_autoextend_size + | opt_ts_max_size + | opt_ts_engine + | ts_wait + ; + +logfile_group_option_list: + /* empty */ {} + logfile_group_options + ; + +logfile_group_options: + logfile_group_option + | logfile_group_options logfile_group_option + | logfile_group_options ',' logfile_group_option + ; + +logfile_group_option: + opt_ts_initial_size + | opt_ts_undo_buffer_size + | opt_ts_redo_buffer_size + | opt_ts_nodegroup + | opt_ts_engine + | ts_wait + | opt_ts_comment + ; + +alter_logfile_group_option_list: + /* empty */ {} + alter_logfile_group_options + ; + +alter_logfile_group_options: + alter_logfile_group_option + | alter_logfile_group_options alter_logfile_group_option + | alter_logfile_group_options ',' alter_logfile_group_option + ; + +alter_logfile_group_option: + opt_ts_initial_size + | opt_ts_engine + | ts_wait + ; + + +ts_datafile: + DATAFILE_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->alter_tablespace_info->data_file_name= $2.str; + }; + +lg_undofile: + UNDOFILE_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->alter_tablespace_info->undo_file_name= $2.str; + }; + +lg_redofile: + REDOFILE_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->alter_tablespace_info->redo_file_name= $2.str; + }; + +tablespace_name: + ident + { + LEX *lex= Lex; + lex->alter_tablespace_info= new st_alter_tablespace(); + lex->alter_tablespace_info->tablespace_name= $1.str; + lex->sql_command= SQLCOM_ALTER_TABLESPACE; + }; + +logfile_group_name: + ident + { + LEX *lex= Lex; + lex->alter_tablespace_info= new st_alter_tablespace(); + lex->alter_tablespace_info->logfile_group_name= $1.str; + lex->sql_command= SQLCOM_ALTER_TABLESPACE; + }; + +ts_access_mode: + READ_ONLY_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_access_mode= TS_READ_ONLY; + } + | READ_WRITE_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_access_mode= TS_READ_WRITE; + } + | NOT_SYM ACCESSIBLE_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_access_mode= TS_NOT_ACCESSIBLE; + }; + +opt_ts_initial_size: + INITIAL_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->initial_size= $3; + }; + +opt_ts_autoextend_size: + AUTOEXTEND_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->autoextend_size= $3; + }; + +opt_ts_max_size: + MAX_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->max_size= $3; + }; + +opt_ts_extent_size: + EXTENT_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->extent_size= $3; + }; + +opt_ts_undo_buffer_size: + UNDO_BUFFER_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->undo_buffer_size= $3; + }; + +opt_ts_redo_buffer_size: + REDO_BUFFER_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->redo_buffer_size= $3; + }; + +opt_ts_nodegroup: + NODEGROUP_SYM opt_equal ulong_num + { + LEX *lex= Lex; + if (lex->alter_tablespace_info->nodegroup_id != UNDEF_NODEGROUP) + { + my_error(ER_TABLESPACE_OPTION_ONLY_ONCE,MYF(0),"NODEGROUP"); + YYABORT; + } + lex->alter_tablespace_info->nodegroup_id= $3; + }; + +opt_ts_comment: + COMMENT_SYM opt_equal TEXT_STRING_sys + { + LEX *lex= Lex; + if (lex->alter_tablespace_info->ts_comment != NULL) + { + my_error(ER_TABLESPACE_OPTION_ONLY_ONCE,MYF(0),"COMMENT"); + YYABORT; + } + lex->alter_tablespace_info->ts_comment= $3.str; + }; + +opt_ts_engine: + opt_storage ENGINE_SYM opt_equal storage_engines + { + LEX *lex= Lex; + if (lex->alter_tablespace_info->storage_engine != DB_TYPE_UNKNOWN) + { + my_error(ER_TABLESPACE_OPTION_ONLY_ONCE,MYF(0), + "STORAGE ENGINE"); + YYABORT; + } + lex->alter_tablespace_info->storage_engine= $4->db_type; + }; + +opt_ts_wait: + /* empty */ + | ts_wait + ; + +ts_wait: + WAIT_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->wait_until_completed= TRUE; + } + | NO_WAIT_SYM + { + LEX *lex= Lex; + if (!(lex->alter_tablespace_info->wait_until_completed)) + { + my_error(ER_TABLESPACE_OPTION_ONLY_ONCE,MYF(0),"NO_WAIT"); + YYABORT; + } + lex->alter_tablespace_info->wait_until_completed= FALSE; + }; + +size_number: + ulong_num { $$= $1;} + | IDENT + { + ulonglong number, test_number; + uint text_shift_number= 0; + longlong prefix_number; + char *end_ptr; + char *start_ptr= $1.str; + uint str_len= strlen(start_ptr); + int error; + prefix_number= my_strtoll10(start_ptr, &end_ptr, &error); + if ((start_ptr + str_len - 1) == end_ptr) + { + switch (end_ptr[0]) + { + case 'g': + case 'G': + text_shift_number+=10; + case 'm': + case 'M': + text_shift_number+=10; + case 'k': + case 'K': + text_shift_number+=10; + break; + default: + { + my_error(ER_WRONG_SIZE_NUMBER, MYF(0)); + YYABORT; + } + } + if (prefix_number >> 31) + { + my_error(ER_SIZE_OVERFLOW_ERROR, MYF(0)); + YYABORT; + } + number= prefix_number << text_shift_number; + } + else + { + my_error(ER_WRONG_SIZE_NUMBER, MYF(0)); + YYABORT; + } + $$= number; + } + ; + +/* + End tablespace part +*/ create2: '(' create2a {} @@ -3196,6 +3599,9 @@ create_table_option: | INSERT_METHOD opt_equal merge_insert_types { Lex->create_info.merge_insert_method= $3; Lex->create_info.used_fields|= HA_CREATE_USED_INSERT_METHOD;} | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys { Lex->create_info.data_file_name= $4.str; Lex->create_info.used_fields|= HA_CREATE_USED_DATADIR; } | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys { Lex->create_info.index_file_name= $4.str; Lex->create_info.used_fields|= HA_CREATE_USED_INDEXDIR; } + | TABLESPACE ident {Lex->create_info.tablespace= $2.str;} + | STORAGE_SYM DISK_SYM {Lex->create_info.store_on_disk= TRUE;} + | STORAGE_SYM MEMORY_SYM {Lex->create_info.store_on_disk= FALSE;} | CONNECTION_SYM opt_equal TEXT_STRING_sys { Lex->create_info.connect_string.str= $3.str; Lex->create_info.connect_string.length= $3.length; Lex->create_info.used_fields|= HA_CREATE_USED_CONNECTION; } ; @@ -3954,6 +4360,26 @@ alter: } view_list_opt AS view_select view_check_option {} + | ALTER TABLESPACE alter_tablespace_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= ALTER_TABLESPACE; + } + | ALTER LOGFILE_SYM GROUP alter_logfile_group_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= ALTER_LOGFILE_GROUP; + } + | ALTER TABLESPACE change_tablespace_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= CHANGE_FILE_TABLESPACE; + } + | ALTER TABLESPACE change_tablespace_access + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= ALTER_ACCESS_MODE_TABLESPACE; + } ; ident_or_empty: @@ -6652,6 +7078,16 @@ drop: LEX *lex= Lex; lex->sql_command= SQLCOM_DROP_TRIGGER; lex->spname= $3; + } + | DROP TABLESPACE tablespace_name opt_ts_engine opt_ts_wait + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= DROP_TABLESPACE; + } + | DROP LOGFILE_SYM GROUP logfile_group_name opt_ts_engine opt_ts_wait + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= DROP_LOGFILE_GROUP; } ; @@ -8105,6 +8541,12 @@ TEXT_STRING_literal: ident: IDENT_sys { $$=$1; } + | READ_ONLY_SYM + { + THD *thd= YYTHD; + $$.str= thd->strmake("read_only",9); + $$.length= 9; + } | keyword { THD *thd= YYTHD; @@ -8227,6 +8669,7 @@ keyword_sp: | ALGORITHM_SYM {} | ANY_SYM {} | AUTO_INC {} + | AUTOEXTEND_SIZE_SYM {} | AVG_ROW_LENGTH {} | AVG_SYM {} | BERKELEY_DB_SYM {} @@ -8251,6 +8694,7 @@ keyword_sp: | CONSISTENT_SYM {} | CUBE_SYM {} | DATA_SYM {} + | DATAFILE_SYM {} | DATETIME {} | DATE_SYM {} | DAY_SYM {} @@ -8259,6 +8703,7 @@ keyword_sp: | DES_KEY_FILE {} | DIRECTORY_SYM {} | DISCARD {} + | DISK_SYM {} | DUMPFILE {} | DUPLICATE_SYM {} | DYNAMIC_SYM {} @@ -8270,6 +8715,7 @@ keyword_sp: | EVENTS_SYM {} | EXPANSION_SYM {} | EXTENDED_SYM {} + | EXTENT_SIZE_SYM {} | FAST_SYM {} | FOUND_SYM {} | DISABLE_SYM {} @@ -8291,6 +8737,7 @@ keyword_sp: | INVOKER_SYM {} | IMPORT {} | INDEXES {} + | INITIAL_SIZE_SYM {} | ISOLATION {} | ISSUER_SYM {} | INNOBASE_SYM {} @@ -8304,6 +8751,7 @@ keyword_sp: | LIST_SYM {} | LOCAL_SYM {} | LOCKS_SYM {} + | LOGFILE_SYM {} | LOGS_SYM {} | MAX_ROWS {} | MASTER_SYM {} @@ -8323,10 +8771,12 @@ keyword_sp: | MASTER_SSL_KEY_SYM {} | MAX_CONNECTIONS_PER_HOUR {} | MAX_QUERIES_PER_HOUR {} + | MAX_SIZE_SYM {} | MAX_UPDATES_PER_HOUR {} | MAX_USER_CONNECTIONS_SYM {} | MAX_VALUE_SYM {} | MEDIUM_SYM {} + | MEMORY_SYM {} | MERGE_SYM {} | MICROSECOND_SYM {} | MIGRATE_SYM {} @@ -8346,7 +8796,8 @@ keyword_sp: | NDBCLUSTER_SYM {} | NEXT_SYM {} | NEW_SYM {} - | NODEGROUP_SYM {} + | NO_WAIT_SYM {} + | NODEGROUP_SYM {} | NONE_SYM {} | NVARCHAR_SYM {} | OFFSET_SYM {} @@ -8373,6 +8824,8 @@ keyword_sp: | RAID_STRIPED_SYM {} | RAID_TYPE {} | RECOVER_SYM {} + | REDO_BUFFER_SIZE_SYM {} + | REDOFILE_SYM {} | REDUNDANT_SYM {} | RELAY_LOG_FILE_SYM {} | RELAY_LOG_POS_SYM {} @@ -8429,6 +8882,8 @@ keyword_sp: | FUNCTION_SYM {} | UNCOMMITTED_SYM {} | UNDEFINED_SYM {} + | UNDO_BUFFER_SIZE_SYM {} + | UNDOFILE_SYM {} | UNKNOWN_SYM {} | UNTIL_SYM {} | USER {} @@ -8437,6 +8892,7 @@ keyword_sp: | VIEW_SYM {} | VALUE_SYM {} | WARNINGS {} + | WAIT_SYM {} | WEEK_SYM {} | WORK_SYM {} | X509_SYM {} diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 78ea1b9fc1d..f258b1b1f99 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -88,6 +88,7 @@ handlerton tina_hton= { NULL, /* Start Consistent Snapshot */ NULL, /* Flush logs */ NULL, /* Show status */ + NULL, /* Alter Tablespace */ HTON_CAN_RECREATE }; diff --git a/storage/ndb/include/kernel/AttributeHeader.hpp b/storage/ndb/include/kernel/AttributeHeader.hpp index a636499a50f..b17bb456bf0 100644 --- a/storage/ndb/include/kernel/AttributeHeader.hpp +++ b/storage/ndb/include/kernel/AttributeHeader.hpp @@ -43,6 +43,8 @@ public: STATIC_CONST( RECORDS_IN_RANGE = 0xFFF8 ); STATIC_CONST( DISK_REF = 0xFFF7 ); + STATIC_CONST( ROWID = 0xFFF6 ); + STATIC_CONST( ROW_GCI = 0xFFF5 ); // NOTE: in 5.1 ctors and init take size in bytes diff --git a/storage/ndb/include/kernel/GlobalSignalNumbers.h b/storage/ndb/include/kernel/GlobalSignalNumbers.h index 745e2daaca7..b05b79cf176 100644 --- a/storage/ndb/include/kernel/GlobalSignalNumbers.h +++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h @@ -570,10 +570,10 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; #define GSN_EVENT_SUBSCRIBE_REQ 458 #define GSN_EVENT_SUBSCRIBE_CONF 459 #define GSN_EVENT_SUBSCRIBE_REF 460 -#define GSN_ACC_COM_BLOCK 461 -#define GSN_ACC_COM_UNBLOCK 462 -#define GSN_TUP_COM_BLOCK 463 -#define GSN_TUP_COM_UNBLOCK 464 +/* 461 unused */ +/* 462 unused */ +/* 463 unused */ +/* 464 unused */ #define GSN_DUMP_STATE_ORD 465 diff --git a/storage/ndb/include/kernel/signaldata/AccLock.hpp b/storage/ndb/include/kernel/signaldata/AccLock.hpp index 1a41b4c9334..8c9cff6f25c 100644 --- a/storage/ndb/include/kernel/signaldata/AccLock.hpp +++ b/storage/ndb/include/kernel/signaldata/AccLock.hpp @@ -24,8 +24,9 @@ * via ACCKEYCONF. */ class AccLockReq { - friend class Dbtux; friend class Dbacc; + friend class Dbtup; + friend class Dbtux; friend bool printACC_LOCKREQ(FILE *, const Uint32*, Uint32, Uint16); public: enum RequestType { // first byte diff --git a/storage/ndb/include/kernel/signaldata/AccScan.hpp b/storage/ndb/include/kernel/signaldata/AccScan.hpp index d94d4da8cca..fd1982c77af 100644 --- a/storage/ndb/include/kernel/signaldata/AccScan.hpp +++ b/storage/ndb/include/kernel/signaldata/AccScan.hpp @@ -46,7 +46,10 @@ private: Uint32 requestInfo; Uint32 transId1; Uint32 transId2; - Uint32 savePointId; + union { + Uint32 savePointId; + Uint32 gci; + }; /** * Previously there where also a scan type @@ -58,6 +61,12 @@ private: static void setLockMode(Uint32 & requestInfo, Uint32 lockMode); static void setReadCommittedFlag(Uint32 & requestInfo, Uint32 readCommitted); static void setDescendingFlag(Uint32 & requestInfo, Uint32 descending); + + static Uint32 getNoDiskScanFlag(const Uint32 & requestInfo); + static void setNoDiskScanFlag(Uint32 & requestInfo, Uint32 nodisk); + + static Uint32 getNRScanFlag(const Uint32 & requestInfo); + static void setNRScanFlag(Uint32 & requestInfo, Uint32 nr); }; /** @@ -66,15 +75,19 @@ private: * l = Lock Mode - 1 Bit 2 * h = Read Committed - 1 Bit 5 * z = Descending (TUX) - 1 Bit 6 + * d = No disk scan - 1 Bit 7 + * n = Node recovery scan - 1 Bit 8 * * 1111111111222222222233 * 01234567890123456789012345678901 - * l hz + * l hzdn */ #define AS_LOCK_MODE_SHIFT (2) #define AS_LOCK_MODE_MASK (1) #define AS_READ_COMMITTED_SHIFT (5) #define AS_DESCENDING_SHIFT (6) +#define AS_NO_DISK_SCAN (7) +#define AS_NR_SCAN (8) inline Uint32 @@ -115,6 +128,32 @@ AccScanReq::setDescendingFlag(UintR & requestInfo, UintR val){ requestInfo |= (val << AS_DESCENDING_SHIFT); } +inline +Uint32 +AccScanReq::getNoDiskScanFlag(const Uint32 & requestInfo){ + return (requestInfo >> AS_NO_DISK_SCAN) & 1; +} + +inline +void +AccScanReq::setNoDiskScanFlag(UintR & requestInfo, UintR val){ + ASSERT_BOOL(val, "AccScanReq::setNoDiskScanFlag"); + requestInfo |= (val << AS_NO_DISK_SCAN); +} + +inline +Uint32 +AccScanReq::getNRScanFlag(const Uint32 & requestInfo){ + return (requestInfo >> AS_NR_SCAN) & 1; +} + +inline +void +AccScanReq::setNRScanFlag(UintR & requestInfo, UintR val){ + ASSERT_BOOL(val, "AccScanReq::setNoDiskScanFlag"); + requestInfo |= (val << AS_NR_SCAN); +} + class AccScanConf { /** * Sender(s) diff --git a/storage/ndb/include/kernel/signaldata/CopyFrag.hpp b/storage/ndb/include/kernel/signaldata/CopyFrag.hpp index 67b935dda64..c92859fdcce 100644 --- a/storage/ndb/include/kernel/signaldata/CopyFrag.hpp +++ b/storage/ndb/include/kernel/signaldata/CopyFrag.hpp @@ -30,7 +30,7 @@ class CopyFragReq { */ friend class Dblqh; public: - STATIC_CONST( SignalLength = 7 ); + STATIC_CONST( SignalLength = 8 ); private: Uint32 userPtr; @@ -40,6 +40,7 @@ private: Uint32 nodeId; Uint32 schemaVersion; Uint32 distributionKey; + Uint32 gci; }; class CopyFragConf { diff --git a/storage/ndb/include/kernel/signaldata/CreateFilegroupImpl.hpp b/storage/ndb/include/kernel/signaldata/CreateFilegroupImpl.hpp index 2062ec3e345..cca77fec924 100644 --- a/storage/ndb/include/kernel/signaldata/CreateFilegroupImpl.hpp +++ b/storage/ndb/include/kernel/signaldata/CreateFilegroupImpl.hpp @@ -68,9 +68,9 @@ struct CreateFilegroupImplRef { enum ErrorCode { NoError = 0, - FilegroupAlreadyExists = 1, - OutOfFilegroupRecords = 2, - OutOfLogBufferMemory = 3 + FilegroupAlreadyExists = 1502, + OutOfFilegroupRecords = 1503, + OutOfLogBufferMemory = 1504 }; Uint32 senderData; @@ -155,15 +155,15 @@ struct CreateFileImplRef { enum ErrorCode { NoError = 0, - InvalidFilegroup = 1, - InvalidFilegroupVersion = 2, - FileNoAlreadyExists = 3, - OutOfFileRecords = 4, - FileError = 5, - InvalidFileMetadata = 6, - OutOfMemory = 7, - FileReadError = 8, - FilegroupNotOnline = 9 + InvalidFilegroup = 1505, + InvalidFilegroupVersion = 1506, + FileNoAlreadyExists = 1507, + OutOfFileRecords = 1508, + FileError = 1509, + InvalidFileMetadata = 1510, + OutOfMemory = 1511, + FileReadError = 1512, + FilegroupNotOnline = 1513 }; Uint32 senderData; diff --git a/storage/ndb/include/kernel/signaldata/DictTabInfo.hpp b/storage/ndb/include/kernel/signaldata/DictTabInfo.hpp index 4b5a9bef884..a46750228b6 100644 --- a/storage/ndb/include/kernel/signaldata/DictTabInfo.hpp +++ b/storage/ndb/include/kernel/signaldata/DictTabInfo.hpp @@ -122,6 +122,10 @@ public: FragmentData = 130, // CREATE_FRAGMENTATION reply TablespaceId = 131, TablespaceVersion = 132, + + RowGCIFlag = 150, + RowChecksumFlag = 151, + TableEnd = 999, AttributeName = 1000, // String, Mandatory @@ -300,6 +304,9 @@ public: Uint32 FragmentDataLen; Uint16 FragmentData[(MAX_FRAGMENT_DATA_BYTES+1)/2]; + Uint32 RowGCIFlag; + Uint32 RowChecksumFlag; + void init(); }; @@ -609,7 +616,9 @@ struct DictFilegroupInfo { LF_UndoGrowSizeHi = 2001, LF_UndoGrowSizeLo = 2002, LF_UndoGrowPattern = 2003, - LF_UndoGrowMaxSize = 2004 + LF_UndoGrowMaxSize = 2004, + LF_UndoFreeWordsHi = 2006, + LF_UndoFreeWordsLo = 2007 }; // FragmentType constants @@ -645,6 +654,8 @@ struct DictFilegroupInfo { GrowSpec LF_UndoGrow; }; //GrowSpec LF_RedoGrow; + Uint32 LF_UndoFreeWordsHi; + Uint32 LF_UndoFreeWordsLo; void init(); }; static const Uint32 MappingSize; diff --git a/storage/ndb/include/kernel/signaldata/DihContinueB.hpp b/storage/ndb/include/kernel/signaldata/DihContinueB.hpp index 77ecf360601..9b625def736 100644 --- a/storage/ndb/include/kernel/signaldata/DihContinueB.hpp +++ b/storage/ndb/include/kernel/signaldata/DihContinueB.hpp @@ -69,7 +69,8 @@ private: ZSEND_END_TO = 41, WAIT_DROP_TAB_WRITING_TO_FILE = 42, - CHECK_WAIT_DROP_TAB_FAILED_LQH = 43 + CHECK_WAIT_DROP_TAB_FAILED_LQH = 43, + ZTO_START_FRAGMENTS = 44 }; }; diff --git a/storage/ndb/include/kernel/signaldata/GetTabInfo.hpp b/storage/ndb/include/kernel/signaldata/GetTabInfo.hpp index 07144b672b6..1390cf4db9b 100644 --- a/storage/ndb/include/kernel/signaldata/GetTabInfo.hpp +++ b/storage/ndb/include/kernel/signaldata/GetTabInfo.hpp @@ -112,10 +112,14 @@ public: public: Uint32 senderData; Uint32 tableId; - Uint32 gci; // For table + union { + Uint32 gci; // For table + Uint32 freeWordsHi; // for logfile group m_free_file_words + }; union { Uint32 totalLen; // In words Uint32 freeExtents; + Uint32 freeWordsLo; // for logfile group m_free_file_words }; Uint32 tableType; Uint32 senderRef; diff --git a/storage/ndb/include/kernel/signaldata/LqhFrag.hpp b/storage/ndb/include/kernel/signaldata/LqhFrag.hpp index 2c4e1fc9e72..cd3f8849552 100644 --- a/storage/ndb/include/kernel/signaldata/LqhFrag.hpp +++ b/storage/ndb/include/kernel/signaldata/LqhFrag.hpp @@ -33,7 +33,7 @@ class AddFragReq { friend bool printADD_FRAG_REQ(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 9 ); + STATIC_CONST( SignalLength = 10 ); enum RequestInfo { CreateInRunning = 0x8000000, @@ -49,6 +49,7 @@ private: Uint32 nodeId; Uint32 totalFragments; Uint32 startGci; + Uint32 logPartId; }; class AddFragRef { @@ -104,7 +105,7 @@ class LqhFragReq { friend bool printLQH_FRAG_REQ(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 19 ); + STATIC_CONST( SignalLength = 20 ); enum RequestInfo { CreateInRunning = 0x8000000, @@ -137,6 +138,7 @@ private: Uint16 noOfKeyAttr; Uint8 checksumIndicator; Uint8 GCPIndicator; + Uint32 logPartId; }; class LqhFragConf { diff --git a/storage/ndb/include/kernel/signaldata/LqhKey.hpp b/storage/ndb/include/kernel/signaldata/LqhKey.hpp index cc9f1dacef4..fd3177e1fe5 100644 --- a/storage/ndb/include/kernel/signaldata/LqhKey.hpp +++ b/storage/ndb/include/kernel/signaldata/LqhKey.hpp @@ -127,6 +127,18 @@ private: static void setApplicationAddressFlag(UintR & requestInfo, UintR val); static void setMarkerFlag(UintR & requestInfo, UintR val); static void setNoDiskFlag(UintR & requestInfo, UintR val); + + static UintR getRowidFlag(const UintR & requestInfo); + static void setRowidFlag(UintR & requestInfo, UintR val); + + /** + * When doing DIRTY WRITES + */ + static UintR getGCIFlag(const UintR & requestInfo); + static void setGCIFlag(UintR & requestInfo, UintR val); + + static UintR getNrCopyFlag(const UintR & requestInfo); + static void setNrCopyFlag(UintR & requestInfo, UintR val); }; /** @@ -134,7 +146,9 @@ private: * * k = Key len - 10 Bits (0-9) max 1023 * l = Last Replica No - 2 Bits -> Max 3 (10-11) - * t = Lock type - 3 Bits -> Max 7 (12-14) + + IF version < NDBD_ROWID_VERSION + * t = Lock type - 3 Bits -> Max 7 (12-14) * p = Application Addr. Ind - 1 Bit (15) * d = Dirty indicator - 1 Bit (16) * i = Interpreted indicator - 1 Bit (17) @@ -146,11 +160,14 @@ private: * u = Read Len Return Ind - 1 Bit (28) * m = Commit ack marker - 1 Bit (29) * x = No disk usage - 1 Bit (30) - * - = Unused - 2 Bit (31) - * - * 1111111111222222222233 - * 01234567890123456789012345678901 - * kkkkkkkkkklltttpdisooorraaacumx- + * z = Use rowid for insert - 1 Bit (31) + * g = gci flag - 1 Bit (12) + * n = NR copy - 1 Bit (13) + + * 1111111111222222222233 + * 01234567890123456789012345678901 + * kkkkkkkkkklltttpdisooorraaacumxz + * kkkkkkkkkkllgn pdisooorraaacumxz */ #define RI_KEYLEN_SHIFT (0) @@ -173,6 +190,9 @@ private: #define RI_RETURN_AI_SHIFT (28) #define RI_MARKER_SHIFT (29) #define RI_NODISK_SHIFT (30) +#define RI_ROWID_SHIFT (31) +#define RI_GCI_SHIFT (12) +#define RI_NR_COPY_SHIFT (13) /** * Scan Info @@ -482,6 +502,45 @@ LqhKeyReq::getNoDiskFlag(const UintR & requestInfo){ return (requestInfo >> RI_NODISK_SHIFT) & 1; } +inline +void +LqhKeyReq::setRowidFlag(UintR & requestInfo, UintR val){ + ASSERT_BOOL(val, "LqhKeyReq::setRowidFlag"); + requestInfo |= (val << RI_ROWID_SHIFT); +} + +inline +UintR +LqhKeyReq::getRowidFlag(const UintR & requestInfo){ + return (requestInfo >> RI_ROWID_SHIFT) & 1; +} + +inline +void +LqhKeyReq::setGCIFlag(UintR & requestInfo, UintR val){ + ASSERT_BOOL(val, "LqhKeyReq::setGciFlag"); + requestInfo |= (val << RI_GCI_SHIFT); +} + +inline +UintR +LqhKeyReq::getGCIFlag(const UintR & requestInfo){ + return (requestInfo >> RI_GCI_SHIFT) & 1; +} + +inline +void +LqhKeyReq::setNrCopyFlag(UintR & requestInfo, UintR val){ + ASSERT_BOOL(val, "LqhKeyReq::setNrCopyFlag"); + requestInfo |= (val << RI_NR_COPY_SHIFT); +} + +inline +UintR +LqhKeyReq::getNrCopyFlag(const UintR & requestInfo){ + return (requestInfo >> RI_NR_COPY_SHIFT) & 1; +} + class LqhKeyConf { /** * Reciver(s) diff --git a/storage/ndb/include/kernel/signaldata/NextScan.hpp b/storage/ndb/include/kernel/signaldata/NextScan.hpp index a502a89108c..5fe4d512d38 100644 --- a/storage/ndb/include/kernel/signaldata/NextScan.hpp +++ b/storage/ndb/include/kernel/signaldata/NextScan.hpp @@ -33,14 +33,6 @@ public: ZSCAN_CLOSE = 6, ZSCAN_NEXT_ABORT = 12 }; - enum CopyFlag { - todo_ZCOPY_NEXT = 1, - todo_ZCOPY_NEXT_COMMIT = 2, - todo_ZCOPY_COMMIT = 3, - todo_ZCOPY_REPEAT = 4, - todo_ZCOPY_ABORT = 5, - todo_ZCOPY_CLOSE = 6 - }; STATIC_CONST( SignalLength = 3 ); private: Uint32 accPtr; // scan record in ACC/TUX @@ -62,8 +54,7 @@ private: Uint32 fragId; Uint32 localKey[2]; Uint32 localKeyLength; - Uint32 keyLength; - Uint32 key[4]; + Uint32 gci; }; #endif diff --git a/storage/ndb/include/kernel/signaldata/TupKey.hpp b/storage/ndb/include/kernel/signaldata/TupKey.hpp index 0706d057d94..42c5cc40362 100644 --- a/storage/ndb/include/kernel/signaldata/TupKey.hpp +++ b/storage/ndb/include/kernel/signaldata/TupKey.hpp @@ -36,7 +36,7 @@ class TupKeyReq { friend bool printTUPKEYREQ(FILE * output, const Uint32 * theData, Uint32 len, Uint16 receiverBlockNo); public: - STATIC_CONST( SignalLength = 16 ); + STATIC_CONST( SignalLength = 18 ); private: @@ -59,6 +59,8 @@ private: Uint32 tcOpIndex; Uint32 savePointId; Uint32 disk_page; + Uint32 m_row_id_page_no; + Uint32 m_row_id_page_idx; }; class TupKeyConf { @@ -78,7 +80,7 @@ class TupKeyConf { friend bool printTUPKEYCONF(FILE * output, const Uint32 * theData, Uint32 len, Uint16 receiverBlockNo); public: - STATIC_CONST( SignalLength = 5 ); + STATIC_CONST( SignalLength = 6 ); private: @@ -90,6 +92,7 @@ private: Uint32 writeLength; Uint32 noFiredTriggers; Uint32 lastRow; + Uint32 rowid; }; class TupKeyRef { diff --git a/storage/ndb/include/ndb_version.h.in b/storage/ndb/include/ndb_version.h.in index 38b72306d03..c953088bc07 100644 --- a/storage/ndb/include/ndb_version.h.in +++ b/storage/ndb/include/ndb_version.h.in @@ -57,6 +57,10 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; */ /*#define NDB_VERSION_ID 0*/ +/** + * From which version do we support rowid + */ +#define NDBD_ROWID_VERSION (MAKE_VERSION(5,1,6)) #define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17) #define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18) diff --git a/storage/ndb/include/ndbapi/NdbDictionary.hpp b/storage/ndb/include/ndbapi/NdbDictionary.hpp index 954dcc62a22..7a025f4611a 100644 --- a/storage/ndb/include/ndbapi/NdbDictionary.hpp +++ b/storage/ndb/include/ndbapi/NdbDictionary.hpp @@ -93,7 +93,9 @@ public: * Get version of object */ virtual int getObjectVersion() const = 0; - + + virtual int getObjectId() const = 0; + /** * Object type */ @@ -501,6 +503,8 @@ public: static const Column * RANGE_NO; static const Column * DISK_REF; static const Column * RECORDS_IN_RANGE; + static const Column * ROWID; + static const Column * ROW_GCI; int getSizeInBytes() const; #endif @@ -751,6 +755,7 @@ public: void setTablespace(const char * name); void setTablespace(const class Tablespace &); const char * getTablespace() const; + Uint32 getTablespaceId() const; /** * Get table object type @@ -768,6 +773,11 @@ public: virtual int getObjectVersion() const; /** + * Get object id + */ + virtual int getObjectId() const; + + /** * Set frm file to store with this table */ void setFrm(const void* data, Uint32 len); @@ -784,6 +794,15 @@ public: /** @} *******************************************************************/ + /** + * + */ + void setRowGCIIndicator(bool value); + bool getRowGCIIndicator() const; + + void setRowChecksumIndicator(bool value); + bool getRowChecksumIndicator() const; + #ifndef DOXYGEN_SHOULD_SKIP_INTERNAL const char *getMysqlName() const; @@ -887,6 +906,11 @@ public: */ virtual int getObjectVersion() const; + /** + * Get object id + */ + virtual int getObjectId() const; + /** @} *******************************************************************/ /** @@ -1157,6 +1181,11 @@ public: */ virtual int getObjectVersion() const; + /** + * Get object id + */ + virtual int getObjectId() const; + #ifndef DOXYGEN_SHOULD_SKIP_INTERNAL void print(); #endif @@ -1183,6 +1212,7 @@ public: class LogfileGroup : public Object { public: LogfileGroup(); + LogfileGroup(const LogfileGroup&); virtual ~LogfileGroup(); void setName(const char * name); @@ -1194,6 +1224,8 @@ public: void setAutoGrowSpecification(const AutoGrowSpecification&); const AutoGrowSpecification& getAutoGrowSpecification() const; + Uint64 getUndoFreeWords() const; + /** * Get object status */ @@ -1204,6 +1236,11 @@ public: */ virtual int getObjectVersion() const; + /** + * Get object id + */ + virtual int getObjectId() const; + private: friend class NdbDictionaryImpl; friend class NdbLogfileGroupImpl; @@ -1217,6 +1254,7 @@ public: class Tablespace : public Object { public: Tablespace(); + Tablespace(const Tablespace&); virtual ~Tablespace(); void setName(const char * name); @@ -1230,7 +1268,9 @@ public: void setDefaultLogfileGroup(const char * name); void setDefaultLogfileGroup(const class LogfileGroup&); + const char * getDefaultLogfileGroup() const; + Uint32 getDefaultLogfileGroupId() const; /** * Get object status @@ -1242,6 +1282,11 @@ public: */ virtual int getObjectVersion() const; + /** + * Get object id + */ + virtual int getObjectId() const; + private: friend class NdbTablespaceImpl; class NdbTablespaceImpl & m_impl; @@ -1251,6 +1296,7 @@ public: class Datafile : public Object { public: Datafile(); + Datafile(const Datafile&); virtual ~Datafile(); void setPath(const char * name); @@ -1263,6 +1309,7 @@ public: void setTablespace(const char * name); void setTablespace(const class Tablespace &); const char * getTablespace() const; + Uint32 getTablespaceId() const; void setNode(Uint32 nodeId); Uint32 getNode() const; @@ -1279,6 +1326,11 @@ public: */ virtual int getObjectVersion() const; + /** + * Get object id + */ + virtual int getObjectId() const; + private: friend class NdbDatafileImpl; class NdbDatafileImpl & m_impl; @@ -1288,6 +1340,7 @@ public: class Undofile : public Object { public: Undofile(); + Undofile(const Undofile&); virtual ~Undofile(); void setPath(const char * path); @@ -1295,11 +1348,11 @@ public: void setSize(Uint64); Uint64 getSize() const; - Uint64 getFree() const; void setLogfileGroup(const char * name); void setLogfileGroup(const class LogfileGroup &); const char * getLogfileGroup() const; + Uint32 getLogfileGroupId() const; void setNode(Uint32 nodeId); Uint32 getNode() const; @@ -1316,6 +1369,11 @@ public: */ virtual int getObjectVersion() const; + /** + * Get object id + */ + virtual int getObjectId() const; + private: friend class NdbUndofileImpl; class NdbUndofileImpl & m_impl; diff --git a/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp b/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp index bf7f14a5d44..d5c051b7d2a 100644 --- a/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp +++ b/storage/ndb/include/ndbapi/ndb_cluster_connection.hpp @@ -18,6 +18,19 @@ #ifndef CLUSTER_CONNECTION_HPP #define CLUSTER_CONNECTION_HPP +class Ndb_cluster_connection_node_iter +{ + friend class Ndb_cluster_connection_impl; +public: + Ndb_cluster_connection_node_iter() : scan_state(~0), + init_pos(0), + cur_pos(0) {}; +private: + unsigned char scan_state; + unsigned char init_pos; + unsigned char cur_pos; +}; + /** * @class Ndb_cluster_connection * @brief Represents a connection to a cluster of storage nodes. @@ -88,6 +101,9 @@ public: unsigned no_db_nodes(); unsigned node_id(); + + void init_get_next_node(Ndb_cluster_connection_node_iter &iter); + unsigned int get_next_node(Ndb_cluster_connection_node_iter &iter); #endif private: diff --git a/storage/ndb/include/util/Bitmask.hpp b/storage/ndb/include/util/Bitmask.hpp index 1ec956daac2..b586c95935e 100644 --- a/storage/ndb/include/util/Bitmask.hpp +++ b/storage/ndb/include/util/Bitmask.hpp @@ -48,6 +48,11 @@ public: static void set(unsigned size, Uint32 data[]); /** + * set bit from <em>start</em> to <em>last</em> + */ + static void set_range(unsigned size, Uint32 data[], unsigned start, unsigned last); + + /** * assign - Set all bits in <em>dst</em> to corresponding in <em>src/<em> */ static void assign(unsigned size, Uint32 dst[], const Uint32 src[]); @@ -62,6 +67,11 @@ public: */ static void clear(unsigned size, Uint32 data[]); + /** + * clear bit from <em>start</em> to <em>last</em> + */ + static void clear_range(unsigned size, Uint32 data[], unsigned start, unsigned last); + static Uint32 getWord(unsigned size, Uint32 data[], unsigned word_pos); static void setWord(unsigned size, Uint32 data[], unsigned word_pos, Uint32 new_word); @@ -184,6 +194,34 @@ BitmaskImpl::set(unsigned size, Uint32 data[]) } } +inline void +BitmaskImpl::set_range(unsigned size, Uint32 data[], + unsigned start, unsigned last) +{ + Uint32 *ptr = data + (start >> 5); + Uint32 *end = data + (last >> 5); + assert(start <= last); + assert(last < (size << 5)); + + Uint32 tmp_word = ~(Uint32)0 << (start & 31); + + if (ptr < end) + { + * ptr ++ |= tmp_word; + + for(; ptr < end; ) + { + * ptr ++ = ~(Uint32)0; + } + + tmp_word = ~(Uint32)0; + } + + tmp_word &= ~(~(Uint32)0 << (last & 31)); + + * ptr |= tmp_word; +} + inline void BitmaskImpl::assign(unsigned size, Uint32 dst[], const Uint32 src[]) { @@ -207,6 +245,34 @@ BitmaskImpl::clear(unsigned size, Uint32 data[]) } } +inline void +BitmaskImpl::clear_range(unsigned size, Uint32 data[], + unsigned start, unsigned last) +{ + Uint32 *ptr = data + (start >> 5); + Uint32 *end = data + (last >> 5); + assert(start <= last); + assert(last < (size << 5)); + + Uint32 tmp_word = ~(Uint32)0 << (start & 31); + + if (ptr < end) + { + * ptr ++ &= ~tmp_word; + + for(; ptr < end; ) + { + * ptr ++ = 0; + } + + tmp_word = ~(Uint32)0; + } + + tmp_word &= ~(~(Uint32)0 << (last & 31)); + + * ptr &= ~tmp_word; +} + inline Uint32 BitmaskImpl::getWord(unsigned size, Uint32 data[], unsigned word_pos) diff --git a/storage/ndb/include/util/Vector.hpp b/storage/ndb/include/util/Vector.hpp index 480dddf8243..cd01d914741 100644 --- a/storage/ndb/include/util/Vector.hpp +++ b/storage/ndb/include/util/Vector.hpp @@ -31,6 +31,8 @@ public: unsigned size() const { return m_size; }; void push_back(const T &); + void push(const T&, unsigned pos); + T& set(T&, unsigned pos, T& fill_obj); T& back(); void erase(unsigned index); @@ -106,6 +108,31 @@ Vector<T>::push_back(const T & t){ template<class T> void +Vector<T>::push(const T & t, unsigned pos) +{ + push_back(t); + if (pos < m_size - 1) + { + for(unsigned i = m_size - 1; i > pos; i--) + { + m_items[i] = m_items[i-1]; + } + m_items[pos] = t; + } +} + +template<class T> +T& +Vector<T>::set(T & t, unsigned pos, T& fill_obj) +{ + fill(pos, fill_obj); + T& ret = m_items[pos]; + m_items[pos] = t; + return ret; +} + +template<class T> +void Vector<T>::erase(unsigned i){ if(i >= m_size) abort(); diff --git a/storage/ndb/src/common/debugger/signaldata/DictTabInfo.cpp b/storage/ndb/src/common/debugger/signaldata/DictTabInfo.cpp index 6212f91290b..885c2a03d93 100644 --- a/storage/ndb/src/common/debugger/signaldata/DictTabInfo.cpp +++ b/storage/ndb/src/common/debugger/signaldata/DictTabInfo.cpp @@ -49,6 +49,8 @@ DictTabInfo::TableMapping[] = { DTIMAPB(Table, FragmentData, FragmentData, 0, MAX_FRAGMENT_DATA_BYTES, FragmentDataLen), DTIMAP(Table, TablespaceId, TablespaceId), DTIMAP(Table, TablespaceVersion, TablespaceVersion), + DTIMAP(Table, RowGCIFlag, RowGCIFlag), + DTIMAP(Table, RowChecksumFlag, RowChecksumFlag), DTIBREAK(AttributeName) }; @@ -128,6 +130,9 @@ DictTabInfo::Table::init(){ memset(FragmentData, 0, sizeof(FragmentData)); TablespaceId = RNIL; TablespaceVersion = ~0; + + RowGCIFlag = ~0; + RowChecksumFlag = ~0; } void @@ -174,7 +179,9 @@ DictFilegroupInfo::Mapping[] = { DFGIMAP(Filegroup, LF_UndoGrowSizeLo, LF_UndoGrow.GrowSizeLo), DFGIMAPS(Filegroup, LF_UndoGrowPattern, LF_UndoGrow.GrowPattern, 0,PATH_MAX), DFGIMAP(Filegroup, LF_UndoGrowMaxSize, LF_UndoGrow.GrowMaxSize), - + DFGIMAP(Filegroup, LF_UndoFreeWordsHi, LF_UndoFreeWordsHi), + DFGIMAP(Filegroup, LF_UndoFreeWordsLo, LF_UndoFreeWordsLo), + DFGIBREAK(FileName) }; diff --git a/storage/ndb/src/common/debugger/signaldata/FsCloseReq.cpp b/storage/ndb/src/common/debugger/signaldata/FsCloseReq.cpp index df9f3cc9fbc..ff05e1d797b 100644 --- a/storage/ndb/src/common/debugger/signaldata/FsCloseReq.cpp +++ b/storage/ndb/src/common/debugger/signaldata/FsCloseReq.cpp @@ -36,5 +36,6 @@ printFSCLOSEREQ(FILE * output, const Uint32 * theData, Uint32 len, Uint16 receiv else fprintf(output, "Don't remove file"); fprintf(output, "\n"); - return true; + + return len == 4; } diff --git a/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp b/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp index 95a571a08ed..2db543ae905 100644 --- a/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp +++ b/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp @@ -51,6 +51,12 @@ printLQHKEYREQ(FILE * output, const Uint32 * theData, Uint32 len, Uint16 receive fprintf(output, "CommitAckMarker "); if(LqhKeyReq::getNoDiskFlag(reqInfo)) fprintf(output, "NoDisk "); + if(LqhKeyReq::getRowidFlag(reqInfo)) + fprintf(output, "Rowid "); + if(LqhKeyReq::getNrCopyFlag(reqInfo)) + fprintf(output, "NrCopy "); + if(LqhKeyReq::getGCIFlag(reqInfo)) + fprintf(output, "GCI "); fprintf(output, "ScanInfo/noFiredTriggers: H\'%x\n", sig->scanInfo); @@ -119,6 +125,20 @@ printLQHKEYREQ(FILE * output, const Uint32 * theData, Uint32 len, Uint16 receive fprintf(output, "H\'%.8x ", sig->variableData[nextPos]); fprintf(output, "\n"); } + + if (LqhKeyReq::getRowidFlag(reqInfo)) + { + fprintf(output, " Rowid: [ page: %d idx: %d ]\n", + sig->variableData[nextPos + 0], + sig->variableData[nextPos + 1]); + nextPos += 2; + } + + if (LqhKeyReq::getGCIFlag(reqInfo)) + { + fprintf(output, " GCI: %u", sig->variableData[nextPos + 0]); + nextPos++; + } if(!LqhKeyReq::getInterpretedFlag(reqInfo)){ fprintf(output, " AttrInfo: "); diff --git a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp index ca2821d8448..69077a4e733 100644 --- a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp +++ b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp @@ -363,10 +363,6 @@ const GsnName SignalNames [] = { ,{ GSN_EVENT_SUBSCRIBE_REQ, "EVENT_SUBSCRIBE_REQ" } ,{ GSN_EVENT_SUBSCRIBE_CONF, "EVENT_SUBSCRIBE_CONF" } ,{ GSN_EVENT_SUBSCRIBE_REF, "EVENT_SUBSCRIBE_REF" } - ,{ GSN_ACC_COM_BLOCK, "ACC_COM_BLOCK" } - ,{ GSN_ACC_COM_UNBLOCK, "ACC_COM_UNBLOCK" } - ,{ GSN_TUP_COM_BLOCK, "TUP_COM_BLOCK" } - ,{ GSN_TUP_COM_UNBLOCK, "TUP_COM_UNBLOCK" } ,{ GSN_DUMP_STATE_ORD, "DUMP_STATE_ORD" } ,{ GSN_START_INFOREQ, "START_INFOREQ" } diff --git a/storage/ndb/src/common/util/Bitmask.cpp b/storage/ndb/src/common/util/Bitmask.cpp index 0aa39a37204..49595106cbb 100644 --- a/storage/ndb/src/common/util/Bitmask.cpp +++ b/storage/ndb/src/common/util/Bitmask.cpp @@ -16,8 +16,6 @@ void print(const Uint32 src[], Uint32 len, Uint32 pos = 0) } } -#ifndef __TEST_BITMASK__ - void BitmaskImpl::getFieldImpl(const Uint32 src[], unsigned shiftL, unsigned len, Uint32 dst[]) @@ -78,7 +76,7 @@ BitmaskImpl::setFieldImpl(Uint32 dst[], * dst |= ((* src) & ((1 << (len - shiftR)) - 1)) << shiftR ; } } -#else +#ifdef __TEST_BITMASK__ #define DEBUG 0 #include <Vector.hpp> @@ -342,6 +340,74 @@ do_test(int bitmask_size) alloc_list.push_back(a); } } + + for(Uint32 i = 0; i<1000; i++) + { + Uint32 sz32 = 10+rand() % 100; + Uint32 zero = 0; + Vector<Uint32> map; + map.fill(sz32, zero); + + Uint32 sz = 32 * sz32; + Uint32 start = (rand() % sz); + Uint32 stop = start + ((rand() % (sz - start)) & 0xFFFFFFFF); + + Vector<Uint32> check; + check.fill(sz32, zero); + + for(Uint32 j = 0; j<sz; j++) + { + bool expect = (j >= start && j<stop); + if(expect) + BitmaskImpl::set(sz32, check.getBase(), j); + } + + BitmaskImpl::set(sz32, map.getBase(), start, stop); + if (!BitmaskImpl::equal(sz32, map.getBase(), check.getBase())) + { + ndbout_c(" FAIL sz: %d [ %d %d ]", sz, start, stop); + printf("check: "); + for(Uint32 j = 0; j<sz32; j++) + printf("%.8x ", check[j]); + printf("\n"); + + printf("map : "); + for(Uint32 j = 0; j<sz32; j++) + printf("%.8x ", map[j]); + printf("\n"); + abort(); + } + + map.clear(); + check.clear(); + + Uint32 one = ~(Uint32)0; + map.fill(sz32, one); + check.fill(sz32, one); + + for(Uint32 j = 0; j<sz; j++) + { + bool expect = (j >= start && j<stop); + if(expect) + BitmaskImpl::clear(sz32, check.getBase(), j); + } + + BitmaskImpl::clear(sz32, map.getBase(), start, stop); + if (!BitmaskImpl::equal(sz32, map.getBase(), check.getBase())) + { + ndbout_c(" FAIL sz: %d [ %d %d ]", sz, start, stop); + printf("check: "); + for(Uint32 j = 0; j<sz32; j++) + printf("%.8x ", check[j]); + printf("\n"); + + printf("map : "); + for(Uint32 j = 0; j<sz32; j++) + printf("%.8x ", map[j]); + printf("\n"); + abort(); + } + } #endif } diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt index 0be5e91cd71..d9d52e43779 100644 --- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt @@ -3,7 +3,7 @@ Next NDBCNTR 1000 Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4013 -Next DBLQH 5042 +Next DBLQH 5043 Next DBDICT 6007 Next DBDIH 7174 Next DBTC 8037 @@ -314,6 +314,8 @@ LQH: 5026 Crash when receiving COPY_ACTIVEREQ 5027 Crash when receiving STAT_RECREQ +5042 Crash starting node, when scan is finished on primary replica + Test Crashes in handling take over ---------------------------------- @@ -461,3 +463,13 @@ Dbdict: 6003 Crash in participant @ CreateTabReq::Prepare 6004 Crash in participant @ CreateTabReq::Commit 6005 Crash in participant @ CreateTabReq::CreateDrop + +Dbtup: +4014 - handleInsert - Out of undo buffer +4015 - handleInsert - Out of log space +4016 - handleInsert - AI Inconsistency +4017 - handleInsert - Out of memory +4018 - handleInsert - Null check error +4019 - handleInsert - Alloc rowid error +4020 - handleInsert - Size change error +4021 - handleInsert - Out of disk space diff --git a/storage/ndb/src/kernel/blocks/OptNR.txt b/storage/ndb/src/kernel/blocks/OptNR.txt new file mode 100644 index 00000000000..17e9a62bb0e --- /dev/null +++ b/storage/ndb/src/kernel/blocks/OptNR.txt @@ -0,0 +1,49 @@ +*** Copy thread + + Scan rowids with GCP > starting nodes GCP + Cases for different ROWID combinations + +RI Primary Starting Result +1 A A Update A +2 B B* Delete B* + Insert B +3 C C* Delete C* + Delete C + Insert C + C +4 Deleted D Delete D +5 E Deleted Insert E +6 F Deleted Delete F + Insert F + F +7 Deleted Deleted Update GCP + +*** Ordinary operations +Op Starting Result +Insert A@1 A@1 Update A +Insert A@1 A@2 Delete A@2, Insert A@1 +Insert A@1 1 busy, A@2 Delete 1, Delete A@2, Insert A@1 +Insert A@1 1 busy Delete 1, Insert A@1 + +Delete A@1 A@1 Delete A@1 +Delete A@1 else noop + +Update A@1 A@1 Update A +Update A@1 else noop + +*** + +Rationale: + +If copy has passed rowid, + then no ordinary operation should be a noop + +If copy has not passed, + then it's ok to do a noop as copy will get there sooner or later + +Copy may not end up in lock queue, as no lock is held on primary. + therefore ordinary ops must be noops when rowid missmatch + +When not scanning in rowid order (e.g. disk order) one must +1 make a second pass in rowid order + - finding deletes and inserts (as 2) +2 mark all inserts "earlier" than current scan pos + so they will be found during second pass + +Note: Dealloc is performed first on backup then on primary diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.cpp b/storage/ndb/src/kernel/blocks/backup/Backup.cpp index 716173be55d..04aee6cc34e 100644 --- a/storage/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/storage/ndb/src/kernel/blocks/backup/Backup.cpp @@ -14,6 +14,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <my_config.h> #include "Backup.hpp" #include <ndb_version.h> @@ -2514,15 +2515,22 @@ Backup::execLIST_TABLES_CONF(Signal* signal) Uint32 tableId = ListTablesConf::getTableId(conf->tableData[i]); Uint32 tableType = ListTablesConf::getTableType(conf->tableData[i]); Uint32 state= ListTablesConf::getTableState(conf->tableData[i]); - if (!DictTabInfo::isTable(tableType) && !DictTabInfo::isIndex(tableType)){ + + if (! (DictTabInfo::isTable(tableType) || + DictTabInfo::isIndex(tableType) || + DictTabInfo::isFilegroup(tableType) || + DictTabInfo::isFile(tableType))) + { jam(); continue; - }//if + } + if (state != DictTabInfo::StateOnline) { jam(); continue; - }//if + } + TablePtr tabPtr; ptr.p->tables.seize(tabPtr); if(tabPtr.i == RNIL) { @@ -2832,6 +2840,8 @@ Backup::execGET_TABINFO_CONF(Signal* signal) //const Uint32 senderRef = info->senderRef; const Uint32 len = conf->totalLen; const Uint32 senderData = conf->senderData; + const Uint32 tableType = conf->tableType; + const Uint32 tableId = conf->tableId; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); @@ -2840,6 +2850,9 @@ Backup::execGET_TABINFO_CONF(Signal* signal) signal->getSection(dictTabInfoPtr, GetTabInfoConf::DICT_TAB_INFO); ndbrequire(dictTabInfoPtr.sz == len); + TablePtr tabPtr ; + ndbrequire(findTable(ptr, tabPtr, tableId)); + /** * No of pages needed */ @@ -2860,7 +2873,7 @@ Backup::execGET_TABINFO_CONF(Signal* signal) ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); FsBuffer & buf = filePtr.p->operation.dataBuffer; { // Write into ctl file - Uint32* dst, dstLen = len + 2; + Uint32* dst, dstLen = len + 3; if(!buf.getWritePtr(&dst, dstLen)) { jam(); ndbrequire(false); @@ -2875,49 +2888,62 @@ Backup::execGET_TABINFO_CONF(Signal* signal) BackupFormat::CtlFile::TableDescription * desc = (BackupFormat::CtlFile::TableDescription*)dst; desc->SectionType = htonl(BackupFormat::TABLE_DESCRIPTION); - desc->SectionLength = htonl(len + 2); - dst += 2; - + desc->SectionLength = htonl(len + 3); + desc->TableType = htonl(tableType); + dst += 3; + copy(dst, dictTabInfoPtr); buf.updateWritePtr(dstLen); }//if } - - ndbrequire(ptr.p->pages.getSize() >= noPages); - Page32Ptr pagePtr; - ptr.p->pages.getPtr(pagePtr, 0); - copy(&pagePtr.p->data[0], dictTabInfoPtr); - releaseSections(signal); - + if(ptr.p->checkError()) { jam(); + releaseSections(signal); defineBackupRef(signal, ptr); return; }//if - TablePtr tabPtr = parseTableDescription(signal, ptr, len); - if(tabPtr.i == RNIL) { + if (!DictTabInfo::isTable(tabPtr.p->tableType)) + { jam(); - defineBackupRef(signal, ptr); - return; - }//if + releaseSections(signal); - TablePtr tmp = tabPtr; - ptr.p->tables.next(tabPtr); - if(DictTabInfo::isIndex(tmp.p->tableType)) + TablePtr tmp = tabPtr; + ptr.p->tables.next(tabPtr); + ptr.p->tables.release(tmp); + goto next; + } + + ndbrequire(ptr.p->pages.getSize() >= noPages); + Page32Ptr pagePtr; + ptr.p->pages.getPtr(pagePtr, 0); + copy(&pagePtr.p->data[0], dictTabInfoPtr); + releaseSections(signal); + + if (!parseTableDescription(signal, ptr, tabPtr, len)) { jam(); - ptr.p->tables.release(tmp); + defineBackupRef(signal, ptr); + return; } - else if(!ptr.p->is_lcp()) + + if(!ptr.p->is_lcp()) { jam(); - signal->theData[0] = tmp.p->tableId; + signal->theData[0] = tabPtr.p->tableId; signal->theData[1] = 1; // lock EXECUTE_DIRECT(DBDICT, GSN_BACKUP_FRAGMENT_REQ, signal, 2); } - if(tabPtr.i == RNIL) { + ptr.p->tables.next(tabPtr); + +next: + if(tabPtr.i == RNIL) + { + /** + * Done with all tables... + */ jam(); ptr.p->pages.release(); @@ -2936,6 +2962,9 @@ Backup::execGET_TABINFO_CONF(Signal* signal) return; }//if + /** + * Fetch next table... + */ signal->theData[0] = BackupContinueB::BUFFER_FULL_META; signal->theData[1] = ptr.i; signal->theData[2] = tabPtr.i; @@ -2943,8 +2972,11 @@ Backup::execGET_TABINFO_CONF(Signal* signal) return; } -Backup::TablePtr -Backup::parseTableDescription(Signal* signal, BackupRecordPtr ptr, Uint32 len) +bool +Backup::parseTableDescription(Signal* signal, + BackupRecordPtr ptr, + TablePtr tabPtr, + Uint32 len) { Page32Ptr pagePtr; @@ -2961,18 +2993,15 @@ Backup::parseTableDescription(Signal* signal, BackupRecordPtr ptr, Uint32 len) DictTabInfo::TableMappingSize, true, true); ndbrequire(stat == SimpleProperties::Break); - - TablePtr tabPtr; - ndbrequire(findTable(ptr, tabPtr, tmpTab.TableId)); - if(DictTabInfo::isIndex(tabPtr.p->tableType)){ - jam(); - return tabPtr; - } + bool lcp = ptr.p->is_lcp(); + + ndbrequire(tabPtr.p->tableId == tmpTab.TableId); + ndbrequire(lcp || (tabPtr.p->tableType == tmpTab.TableType)); + /** * LCP should not save disk attributes but only mem attributes */ - bool lcp = ptr.p->is_lcp(); /** * Initialize table object @@ -3017,8 +3046,7 @@ Backup::parseTableDescription(Signal* signal, BackupRecordPtr ptr, Uint32 len) { jam(); ptr.p->setErrorCode(DefineBackupRef::FailedToAllocateAttributeRecord); - tabPtr.i = RNIL; - return tabPtr; + return false; } attrPtr.p->data.m_flags = 0; @@ -3045,26 +3073,58 @@ Backup::parseTableDescription(Signal* signal, BackupRecordPtr ptr, Uint32 len) } }//for - if(lcp && disk) + + if(lcp) { - /** - * Remove all disk attributes, but add DISK_REF (8 bytes) - */ - tabPtr.p->noOfAttributes -= (disk - 1); - - AttributePtr attrPtr; - ndbrequire(tabPtr.p->attributes.seize(attrPtr)); - - Uint32 sz32 = 2; - attrPtr.p->data.m_flags = 0; - attrPtr.p->data.attrId = AttributeHeader::DISK_REF; - attrPtr.p->data.m_flags = Attribute::COL_FIXED; - attrPtr.p->data.sz32 = sz32; - - attrPtr.p->data.offset = tabPtr.p->sz_FixedAttributes; - tabPtr.p->sz_FixedAttributes += sz32; + if (disk) + { + /** + * Remove all disk attributes, but add DISK_REF (8 bytes) + */ + tabPtr.p->noOfAttributes -= (disk - 1); + + AttributePtr attrPtr; + ndbrequire(tabPtr.p->attributes.seize(attrPtr)); + + Uint32 sz32 = 2; + attrPtr.p->data.attrId = AttributeHeader::DISK_REF; + attrPtr.p->data.m_flags = Attribute::COL_FIXED; + attrPtr.p->data.sz32 = 2; + + attrPtr.p->data.offset = tabPtr.p->sz_FixedAttributes; + tabPtr.p->sz_FixedAttributes += sz32; + } + + { + AttributePtr attrPtr; + ndbrequire(tabPtr.p->attributes.seize(attrPtr)); + + Uint32 sz32 = 2; + attrPtr.p->data.attrId = AttributeHeader::ROWID; + attrPtr.p->data.m_flags = Attribute::COL_FIXED; + attrPtr.p->data.sz32 = 2; + + attrPtr.p->data.offset = tabPtr.p->sz_FixedAttributes; + tabPtr.p->sz_FixedAttributes += sz32; + tabPtr.p->noOfAttributes ++; + } + + if (tmpTab.RowGCIFlag) + { + AttributePtr attrPtr; + ndbrequire(tabPtr.p->attributes.seize(attrPtr)); + + Uint32 sz32 = 2; + attrPtr.p->data.attrId = AttributeHeader::ROW_GCI; + attrPtr.p->data.m_flags = Attribute::COL_FIXED; + attrPtr.p->data.sz32 = 2; + + attrPtr.p->data.offset = tabPtr.p->sz_FixedAttributes; + tabPtr.p->sz_FixedAttributes += sz32; + tabPtr.p->noOfAttributes ++; + } } - return tabPtr; + return true; } void diff --git a/storage/ndb/src/kernel/blocks/backup/Backup.hpp b/storage/ndb/src/kernel/blocks/backup/Backup.hpp index 6d9e0dc5aed..2144ddeac11 100644 --- a/storage/ndb/src/kernel/blocks/backup/Backup.hpp +++ b/storage/ndb/src/kernel/blocks/backup/Backup.hpp @@ -607,7 +607,7 @@ public: NodeId getMasterNodeId() const { return c_masterNodeId; } bool findTable(const BackupRecordPtr &, TablePtr &, Uint32 tableId) const; - TablePtr parseTableDescription(Signal*, BackupRecordPtr ptr, Uint32 len); + bool parseTableDescription(Signal*, BackupRecordPtr ptr, TablePtr, Uint32); bool insertFileHeader(BackupFormat::FileType, BackupRecord*, BackupFile*); void sendBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errorCode); diff --git a/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp b/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp index 60f2edd6bed..76c1f1aedad 100644 --- a/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp +++ b/storage/ndb/src/kernel/blocks/backup/BackupFormat.hpp @@ -115,7 +115,8 @@ struct BackupFormat { struct TableDescription { Uint32 SectionType; Uint32 SectionLength; - Uint32 DictTabInfo[1]; // Length = SectionLength - 2 + Uint32 TableType; + Uint32 DictTabInfo[1]; // Length = SectionLength - 3 }; /** diff --git a/storage/ndb/src/kernel/blocks/dbacc/Dbacc.hpp b/storage/ndb/src/kernel/blocks/dbacc/Dbacc.hpp index a0e96a4515d..2794990d608 100644 --- a/storage/ndb/src/kernel/blocks/dbacc/Dbacc.hpp +++ b/storage/ndb/src/kernel/blocks/dbacc/Dbacc.hpp @@ -102,12 +102,6 @@ ndbout << "Ptr: " << ptr.p->word32 << " \tIndex: " << tmp_string << " \tValue: " #define ZDEFAULT_LIST 3 #define ZWORDS_IN_PAGE 2048 #define ZADDFRAG 0 -#define ZCOPY_NEXT 1 -#define ZCOPY_NEXT_COMMIT 2 -#define ZCOPY_COMMIT 3 -#define ZCOPY_REPEAT 4 -#define ZCOPY_ABORT 5 -#define ZCOPY_CLOSE 6 #define ZDIRARRAY 68 #define ZDIRRANGESIZE 65 //#define ZEMPTY_FRAGMENT 0 @@ -740,7 +734,7 @@ private: void releaseRightlist(Signal* signal); void checkoverfreelist(Signal* signal); void abortOperation(Signal* signal); - void accAbortReqLab(Signal* signal, bool sendConf); + void accAbortReqLab(Signal* signal); void commitOperation(Signal* signal); void copyOpInfo(Signal* signal); Uint32 executeNextOperation(Signal* signal); diff --git a/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index 391405795e6..e146d2cc74b 100644 --- a/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -21,6 +21,7 @@ #include <AttributeHeader.hpp> #include <signaldata/AccFrag.hpp> #include <signaldata/AccScan.hpp> +#include <signaldata/NextScan.hpp> #include <signaldata/AccLock.hpp> #include <signaldata/EventReport.hpp> #include <signaldata/FsConf.hpp> @@ -1072,7 +1073,7 @@ void Dbacc::execACCKEYREQ(Signal* signal) initOpRec(signal); // normalize key if any char attr - if (! operationRecPtr.p->isAccLockReq && fragrecptr.p->hasCharAttr) + if (operationRecPtr.p->tupkeylen && fragrecptr.p->hasCharAttr) xfrmKeyData(signal); /*---------------------------------------------------------------*/ @@ -1778,12 +1779,13 @@ void Dbacc::execACC_COMMITREQ(Signal* signal) void Dbacc::execACC_ABORTREQ(Signal* signal) { jamEntry(); - accAbortReqLab(signal, true); + accAbortReqLab(signal); }//Dbacc::execACC_ABORTREQ() -void Dbacc::accAbortReqLab(Signal* signal, bool sendConf) +void Dbacc::accAbortReqLab(Signal* signal) { operationRecPtr.i = signal->theData[0]; + bool sendConf = signal->theData[1]; ptrCheckGuard(operationRecPtr, coprecsize, operationrec); tresult = 0; /* ZFALSE */ if ((operationRecPtr.p->transactionstate == ACTIVE) || @@ -1847,6 +1849,7 @@ void Dbacc::execACC_LOCKREQ(Signal* signal) operationRecPtr.p->userblockref = req->userRef; operationRecPtr.p->operation = ZUNDEFINED_OP; operationRecPtr.p->transactionstate = IDLE; + operationRecPtr.p->scanRecPtr = RNIL; // do read with lock via ACCKEYREQ Uint32 lockMode = (lockOp == AccLockReq::LockShared) ? 0 : 1; Uint32 opCode = ZSCAN_OP; @@ -1854,7 +1857,7 @@ void Dbacc::execACC_LOCKREQ(Signal* signal) signal->theData[1] = fragrecptr.i; signal->theData[2] = opCode | (lockMode << 4) | (1u << 31); signal->theData[3] = req->hashValue; - signal->theData[4] = 1; // fake primKeyLen + signal->theData[4] = 0; // search local key signal->theData[5] = req->transId1; signal->theData[6] = req->transId2; // enter local key in place of PK @@ -1896,7 +1899,8 @@ void Dbacc::execACC_LOCKREQ(Signal* signal) jam(); // do abort via ACC_ABORTREQ (immediate) signal->theData[0] = req->accOpPtr; - accAbortReqLab(signal, false); + signal->theData[1] = false; // Dont send abort + accAbortReqLab(signal); releaseOpRec(signal); req->returnCode = AccLockReq::Success; *sig = *req; @@ -1906,7 +1910,8 @@ void Dbacc::execACC_LOCKREQ(Signal* signal) jam(); // do abort via ACC_ABORTREQ (with conf signal) signal->theData[0] = req->accOpPtr; - accAbortReqLab(signal, true); + signal->theData[1] = true; // send abort + accAbortReqLab(signal); releaseOpRec(signal); req->returnCode = AccLockReq::Success; *sig = *req; @@ -2575,7 +2580,8 @@ Dbacc::readTablePk(Uint32 localkey1) memset(ckeys, 0x1f, (fragrecptr.p->keyLength * MAX_XFRM_MULTIPLY) << 2); #endif int ret = c_tup->accReadPk(tableId, fragId, fragPageId, pageIndex, ckeys, true); - ndbrequire(ret > 0); + jamEntry(); + ndbrequire(ret >= 0); return ret; } @@ -2632,7 +2638,7 @@ void Dbacc::getElement(Signal* signal) * - table key for ACCKEYREQ, stored in TUP * - local key (1 word) for ACC_LOCKREQ and UNDO, stored in ACC */ - const bool searchLocalKey = operationRecPtr.p->isAccLockReq; + const bool searchLocalKey = operationRecPtr.p->tupkeylen == 0; ndbrequire(TelemLen == ZELEM_HEAD_SIZE + fragrecptr.p->localkeylen); tgeNextptrtype = ZLEFT; @@ -2775,16 +2781,18 @@ void Dbacc::commitdelete(Signal* signal) jam(); Uint32 localKey = operationRecPtr.p->localdata[0]; Uint32 userptr= operationRecPtr.p->userptr; - + Uint32 scanInd = operationRecPtr.p->operation == ZSCAN_OP + || operationRecPtr.p->isAccLockReq; + signal->theData[0] = fragrecptr.p->myfid; signal->theData[1] = fragrecptr.p->myTableId; - signal->theData[2] = operationRecPtr.p->localdata[0]; Uint32 pageId = localKey >> MAX_TUPLES_BITS; Uint32 pageIndex = localKey & ((1 << MAX_TUPLES_BITS) - 1); signal->theData[2] = pageId; signal->theData[3] = pageIndex; signal->theData[4] = userptr; - EXECUTE_DIRECT(DBTUP, GSN_TUP_DEALLOCREQ, signal, 5); + signal->theData[5] = scanInd; + EXECUTE_DIRECT(DBLQH, GSN_TUP_DEALLOCREQ, signal, 6); jamEntry(); getdirindex(signal); @@ -5382,12 +5390,12 @@ void Dbacc::execNEXT_SCANREQ(Signal* signal) scanPtr.p->scanTimer = scanPtr.p->scanContinuebCounter; switch (tscanNextFlag) { - case ZCOPY_NEXT: + case NextScanReq::ZSCAN_NEXT: jam(); /*empty*/; break; - case ZCOPY_NEXT_COMMIT: - case ZCOPY_COMMIT: + case NextScanReq::ZSCAN_NEXT_COMMIT: + case NextScanReq::ZSCAN_COMMIT: jam(); /* --------------------------------------------------------------------- */ /* COMMIT ACTIVE OPERATION. @@ -5402,7 +5410,7 @@ void Dbacc::execNEXT_SCANREQ(Signal* signal) takeOutActiveScanOp(signal); releaseOpRec(signal); scanPtr.p->scanOpsAllocated--; - if (tscanNextFlag == ZCOPY_COMMIT) { + if (tscanNextFlag == NextScanReq::ZSCAN_COMMIT) { jam(); signal->theData[0] = scanPtr.p->scanUserptr; Uint32 blockNo = refToBlock(scanPtr.p->scanUserblockref); @@ -5410,7 +5418,7 @@ void Dbacc::execNEXT_SCANREQ(Signal* signal) return; }//if break; - case ZCOPY_CLOSE: + case NextScanReq::ZSCAN_CLOSE: jam(); fragrecptr.i = scanPtr.p->activeLocalFrag; ptrCheckGuard(fragrecptr, cfragmentsize, fragmentrec); @@ -5995,6 +6003,7 @@ void Dbacc::initScanOpRec(Signal* signal) scanPtr.p->scanOpsAllocated++; + operationRecPtr.p->userptr = RNIL; operationRecPtr.p->scanRecPtr = scanPtr.i; operationRecPtr.p->operation = ZSCAN_OP; operationRecPtr.p->transactionstate = ACTIVE; diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index 9755a65aa53..9a842995625 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -353,7 +353,9 @@ void Dbdict::packTableIntoPages(Signal* signal) case DictTabInfo::LogfileGroup:{ FilegroupPtr fg_ptr; ndbrequire(c_filegroup_hash.find(fg_ptr, tableId)); - packFilegroupIntoPages(w, fg_ptr); + const Uint32 free_hi= signal->theData[4]; + const Uint32 free_lo= signal->theData[5]; + packFilegroupIntoPages(w, fg_ptr, free_hi, free_lo); break; } case DictTabInfo::Datafile:{ @@ -420,7 +422,13 @@ Dbdict::packTableIntoPages(SimpleProperties::Writer & w, w.add(DictTabInfo::NoOfVariable, (Uint32)0); w.add(DictTabInfo::KeyLength, tablePtr.p->tupKeyLength); - w.add(DictTabInfo::TableLoggedFlag, tablePtr.p->storedTable); + w.add(DictTabInfo::TableLoggedFlag, + !!(tablePtr.p->m_bits & TableRecord::TR_Logged)); + w.add(DictTabInfo::RowGCIFlag, + !!(tablePtr.p->m_bits & TableRecord::TR_RowGCI)); + w.add(DictTabInfo::RowChecksumFlag, + !!(tablePtr.p->m_bits & TableRecord::TR_RowChecksum)); + w.add(DictTabInfo::MinLoadFactor, tablePtr.p->minLoadFactor); w.add(DictTabInfo::MaxLoadFactor, tablePtr.p->maxLoadFactor); w.add(DictTabInfo::TableKValue, tablePtr.p->kValue); @@ -532,7 +540,9 @@ Dbdict::packTableIntoPages(SimpleProperties::Writer & w, void Dbdict::packFilegroupIntoPages(SimpleProperties::Writer & w, - FilegroupPtr fg_ptr){ + FilegroupPtr fg_ptr, + const Uint32 undo_free_hi, + const Uint32 undo_free_lo){ DictFilegroupInfo::Filegroup fg; fg.init(); ConstRope r(c_rope_pool, fg_ptr.p->m_name); @@ -553,6 +563,8 @@ Dbdict::packFilegroupIntoPages(SimpleProperties::Writer & w, break; case DictTabInfo::LogfileGroup: fg.LF_UndoBufferSize = fg_ptr.p->m_logfilegroup.m_undo_buffer_size; + fg.LF_UndoFreeWordsHi= undo_free_hi; + fg.LF_UndoFreeWordsLo= undo_free_lo; //fg.LF_UndoGrow = ; break; default: @@ -1794,7 +1806,7 @@ void Dbdict::initialiseTableRecord(TableRecordPtr tablePtr) tablePtr.p->minLoadFactor = 70; tablePtr.p->noOfPrimkey = 1; tablePtr.p->tupKeyLength = 1; - tablePtr.p->storedTable = true; + tablePtr.p->m_bits = 0; tablePtr.p->tableType = DictTabInfo::UserTable; tablePtr.p->primaryTableId = RNIL; // volatile elements @@ -2309,7 +2321,7 @@ Dbdict::rebuildIndexes(Signal* signal, Uint32 i){ req->setParallelism(16); // from file index state is not defined currently - if (indexPtr.p->storedTable) { + if (indexPtr.p->m_bits & TableRecord::TR_Logged) { // rebuild not needed req->addRequestFlag((Uint32)RequestFlag::RF_NOBUILD); } @@ -2979,6 +2991,26 @@ Dbdict::execGET_TABINFO_CONF(Signal* signal){ signal->theData[4]= free_extents; sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB); } + else if(refToBlock(conf->senderRef) == LGMAN + && (refToNode(conf->senderRef) == 0 + || refToNode(conf->senderRef) == getOwnNodeId())) + { + jam(); + FilegroupPtr fg_ptr; + ndbrequire(c_filegroup_hash.find(fg_ptr, conf->tableId)); + const Uint32 free_hi= conf->freeWordsHi; + const Uint32 free_lo= conf->freeWordsLo; + const Uint32 id= conf->tableId; + const Uint32 type= conf->tableType; + const Uint32 data= conf->senderData; + signal->theData[0]= ZPACK_TABLE_INTO_PAGES; + signal->theData[1]= id; + signal->theData[2]= type; + signal->theData[3]= data; + signal->theData[4]= free_hi; + signal->theData[5]= free_lo; + sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB); + } else { jam(); @@ -5067,7 +5099,7 @@ Dbdict::createTab_dih(Signal* signal, req->fragType = tabPtr.p->fragmentType; req->kValue = tabPtr.p->kValue; req->noOfReplicas = 0; - req->storedTable = tabPtr.p->storedTable; + req->storedTable = !!(tabPtr.p->m_bits & TableRecord::TR_Logged); req->tableType = tabPtr.p->tableType; req->schemaVersion = tabPtr.p->tableVersion; req->primaryTableId = tabPtr.p->primaryTableId; @@ -5166,6 +5198,7 @@ Dbdict::execADD_FRAGREQ(Signal* signal) { Uint32 fragCount = req->totalFragments; Uint32 requestInfo = req->requestInfo; Uint32 startGci = req->startGci; + Uint32 logPart = req->logPartId; ndbrequire(node == getOwnNodeId()); @@ -5215,11 +5248,12 @@ Dbdict::execADD_FRAGREQ(Signal* signal) { // noOfCharsets passed to TUP in upper half req->noOfNewAttr |= (tabPtr.p->noOfCharsets << 16); req->checksumIndicator = 1; - req->GCPIndicator = 0; + req->GCPIndicator = 1; req->startGci = startGci; req->tableType = tabPtr.p->tableType; req->primaryTableId = tabPtr.p->primaryTableId; req->tablespace_id= tabPtr.p->m_tablespace_id; + req->logPartId = logPart; sendSignal(DBLQH_REF, GSN_LQHFRAGREQ, signal, LqhFragReq::SignalLength, JBB); } @@ -5412,7 +5446,7 @@ Dbdict::execTAB_COMMITCONF(Signal* signal){ signal->theData[0] = tabPtr.i; signal->theData[1] = tabPtr.p->tableVersion; - signal->theData[2] = (Uint32)tabPtr.p->storedTable; + signal->theData[2] = (Uint32)!!(tabPtr.p->m_bits & TableRecord::TR_Logged); signal->theData[3] = reference(); signal->theData[4] = (Uint32)tabPtr.p->tableType; signal->theData[5] = createTabPtr.p->key; @@ -5816,7 +5850,12 @@ void Dbdict::handleTabInfoInit(SimpleProperties::Reader & it, } tablePtr.p->noOfAttributes = tableDesc.NoOfAttributes; - tablePtr.p->storedTable = tableDesc.TableLoggedFlag; + tablePtr.p->m_bits |= + (tableDesc.TableLoggedFlag ? TableRecord::TR_Logged : 0); + tablePtr.p->m_bits |= + (tableDesc.RowChecksumFlag ? TableRecord::TR_RowChecksum : 0); + tablePtr.p->m_bits |= + (tableDesc.RowGCIFlag ? TableRecord::TR_RowGCI : 0); tablePtr.p->minLoadFactor = tableDesc.MinLoadFactor; tablePtr.p->maxLoadFactor = tableDesc.MaxLoadFactor; tablePtr.p->fragmentType = (DictTabInfo::FragmentType)tableDesc.FragmentType; @@ -5853,7 +5892,7 @@ void Dbdict::handleTabInfoInit(SimpleProperties::Reader & it, tablePtr.p->buildTriggerId = RNIL; tablePtr.p->indexLocal = 0; - handleTabInfo(it, parseP, tableDesc.TablespaceVersion); + handleTabInfo(it, parseP, tableDesc); if(parseP->errorCode != 0) { @@ -5866,7 +5905,7 @@ void Dbdict::handleTabInfoInit(SimpleProperties::Reader & it, void Dbdict::handleTabInfo(SimpleProperties::Reader & it, ParseDictTabInfoRecord * parseP, - Uint32 tablespaceVersion) + DictTabInfo::Table &tableDesc) { TableRecordPtr tablePtr = parseP->tablePtr; @@ -6105,7 +6144,7 @@ void Dbdict::handleTabInfo(SimpleProperties::Reader & it, tabRequire(false, CreateTableRef::NotATablespace); } - if(tablespacePtr.p->m_version != tablespaceVersion) + if(tablespacePtr.p->m_version != tableDesc.TablespaceVersion) { tabRequire(false, CreateTableRef::InvalidTablespaceVersion); } @@ -7061,6 +7100,18 @@ void Dbdict::execGET_TABINFOREQ(Signal* signal) sendSignal(TSMAN_REF, GSN_GET_TABINFOREQ, signal, GetTabInfoReq::SignalLength, JBB); } + else if(objEntry->m_tableType==DictTabInfo::LogfileGroup) + { + jam(); + GetTabInfoReq *req= (GetTabInfoReq*)signal->theData; + req->senderData= c_retrieveRecord.retrievePage; + req->senderRef= reference(); + req->requestType= GetTabInfoReq::RequestById; + req->tableId= obj_id; + + sendSignal(LGMAN_REF, GSN_GET_TABINFOREQ, signal, + GetTabInfoReq::SignalLength, JBB); + } else { jam(); @@ -7209,7 +7260,7 @@ Dbdict::execLIST_TABLES_REQ(Signal* signal) } } // store - if (! tablePtr.p->storedTable) { + if (! (tablePtr.p->m_bits & TableRecord::TR_Logged)) { conf->setTableStore(pos, DictTabInfo::StoreTemporary); } else { conf->setTableStore(pos, DictTabInfo::StorePermanent); @@ -7242,6 +7293,7 @@ Dbdict::execLIST_TABLES_REQ(Signal* signal) conf->tableData[pos] = 0; conf->setTableId(pos, iter.curr.p->m_id); conf->setTableType(pos, type); // type + conf->setTableState(pos, DictTabInfo::StateOnline); // XXX todo pos++; } if (DictTabInfo::isFile(type)){ @@ -7249,6 +7301,7 @@ Dbdict::execLIST_TABLES_REQ(Signal* signal) conf->tableData[pos] = 0; conf->setTableId(pos, iter.curr.p->m_id); conf->setTableType(pos, type); // type + conf->setTableState(pos, DictTabInfo::StateOnline); // XXX todo pos++; } @@ -7570,8 +7623,9 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) indexPtr.i = RNIL; // invalid indexPtr.p = &indexRec; initialiseTableRecord(indexPtr); + indexPtr.p->m_bits = TableRecord::TR_RowChecksum; if (req->getIndexType() == DictTabInfo::UniqueHashIndex) { - indexPtr.p->storedTable = opPtr.p->m_storedIndex; + indexPtr.p->m_bits |= (opPtr.p->m_storedIndex ? TableRecord::TR_Logged:0); indexPtr.p->fragmentType = DictTabInfo::DistrKeyUniqueHashIndex; } else if (req->getIndexType() == DictTabInfo::OrderedIndex) { // first version will not supported logging @@ -7581,7 +7635,6 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) opPtr.p->m_errorLine = __LINE__; return; } - indexPtr.p->storedTable = false; indexPtr.p->fragmentType = DictTabInfo::DistrKeyOrderedIndex; } else { jam(); @@ -7665,7 +7718,7 @@ Dbdict::createIndex_toCreateTable(Signal* signal, OpCreateIndexPtr opPtr) indexPtr.p->noOfNullAttr = 0; // write index table w.add(DictTabInfo::TableName, opPtr.p->m_indexName); - w.add(DictTabInfo::TableLoggedFlag, indexPtr.p->storedTable); + w.add(DictTabInfo::TableLoggedFlag, !!(indexPtr.p->m_bits & TableRecord::TR_Logged)); w.add(DictTabInfo::FragmentTypeVal, indexPtr.p->fragmentType); w.add(DictTabInfo::TableTypeVal, indexPtr.p->tableType); Rope name(c_rope_pool, tablePtr.p->tableName); @@ -13817,6 +13870,7 @@ Dbdict::execCREATE_OBJ_REQ(Signal* signal){ createObjPtr.p->m_obj_type = objType; createObjPtr.p->m_obj_version = objVersion; createObjPtr.p->m_obj_info_ptr_i = objInfoPtr.i; + createObjPtr.p->m_obj_ptr_i = RNIL; createObjPtr.p->m_callback.m_callbackData = key; createObjPtr.p->m_callback.m_callbackFunction= @@ -14513,6 +14567,9 @@ Dbdict::create_fg_prepare_start(Signal* signal, SchemaOp* op){ SegmentedSectionPtr objInfoPtr; getSection(objInfoPtr, ((OpCreateObj*)op)->m_obj_info_ptr_i); SimplePropertiesSectionReader it(objInfoPtr, getSectionSegmentPool()); + + Ptr<DictObject> obj_ptr; obj_ptr.setNull(); + FilegroupPtr fg_ptr; fg_ptr.setNull(); SimpleProperties::UnpackStatus status; DictFilegroupInfo::Filegroup fg; fg.init(); @@ -14552,15 +14609,12 @@ Dbdict::create_fg_prepare_start(Signal* signal, SchemaOp* op){ break; } - Ptr<DictObject> obj_ptr; if(!c_obj_pool.seize(obj_ptr)){ op->m_errorCode = CreateTableRef::NoMoreTableRecords; break; } - FilegroupPtr fg_ptr; if(!c_filegroup_pool.seize(fg_ptr)){ - c_obj_pool.release(obj_ptr); op->m_errorCode = CreateTableRef::NoMoreTableRecords; break; } @@ -14569,8 +14623,6 @@ Dbdict::create_fg_prepare_start(Signal* signal, SchemaOp* op){ Rope name(c_rope_pool, obj_ptr.p->m_name); if(!name.assign(fg.FilegroupName, len, hash)){ op->m_errorCode = CreateTableRef::TableNameTooLong; - c_obj_pool.release(obj_ptr); - c_filegroup_pool.release(fg_ptr); break; } } @@ -14618,8 +14670,24 @@ Dbdict::create_fg_prepare_start(Signal* signal, SchemaOp* op){ op->m_obj_ptr_i = fg_ptr.i; } while(0); - + error: + if (op->m_errorCode) + { + jam(); + if (!fg_ptr.isNull()) + { + jam(); + c_filegroup_pool.release(fg_ptr); + } + + if (!obj_ptr.isNull()) + { + jam(); + c_obj_pool.release(obj_ptr); + } + } + execute(signal, op->m_callback, 0); } @@ -14690,14 +14758,33 @@ Dbdict::execCREATE_FILEGROUP_CONF(Signal* signal){ void Dbdict::create_fg_abort_start(Signal* signal, SchemaOp* op){ - execute(signal, op->m_callback, 0); - abort(); + CreateFilegroupImplReq* req = + (CreateFilegroupImplReq*)signal->getDataPtrSend(); + + if (op->m_obj_ptr_i != RNIL) + { + jam(); + send_drop_fg(signal, op, DropFilegroupImplReq::Commit); + return; + } + + execute(signal, op->m_callback, 0); } void Dbdict::create_fg_abort_complete(Signal* signal, SchemaOp* op){ + + if (op->m_obj_ptr_i != RNIL) + { + jam(); + FilegroupPtr fg_ptr; + c_filegroup_pool.getPtr(fg_ptr, op->m_obj_ptr_i); + + release_object(fg_ptr.p->m_obj_ptr_i); + c_filegroup_hash.release(fg_ptr); + } + execute(signal, op->m_callback, 0); - abort(); } void @@ -14709,6 +14796,9 @@ Dbdict::create_file_prepare_start(Signal* signal, SchemaOp* op){ getSection(objInfoPtr, ((OpCreateObj*)op)->m_obj_info_ptr_i); SimplePropertiesSectionReader it(objInfoPtr, getSectionSegmentPool()); + Ptr<DictObject> obj_ptr; obj_ptr.setNull(); + FilePtr filePtr; filePtr.setNull(); + DictFilegroupInfo::File f; f.init(); SimpleProperties::UnpackStatus status; status = SimpleProperties::unpack(it, &f, @@ -14758,16 +14848,13 @@ Dbdict::create_file_prepare_start(Signal* signal, SchemaOp* op){ } // Loop through all filenames... - Ptr<DictObject> obj_ptr; if(!c_obj_pool.seize(obj_ptr)){ op->m_errorCode = CreateTableRef::NoMoreTableRecords; break; } - FilePtr filePtr; if (! c_file_pool.seize(filePtr)){ op->m_errorCode = CreateFileRef::OutOfFileRecords; - c_obj_pool.release(obj_ptr); break; } @@ -14775,8 +14862,6 @@ Dbdict::create_file_prepare_start(Signal* signal, SchemaOp* op){ Rope name(c_rope_pool, obj_ptr.p->m_name); if(!name.assign(f.FileName, len, hash)){ op->m_errorCode = CreateTableRef::TableNameTooLong; - c_obj_pool.release(obj_ptr); - c_file_pool.release(filePtr); break; } } @@ -14813,6 +14898,22 @@ Dbdict::create_file_prepare_start(Signal* signal, SchemaOp* op){ op->m_obj_ptr_i = filePtr.i; } while(0); + + if (op->m_errorCode) + { + jam(); + if (!filePtr.isNull()) + { + jam(); + c_file_pool.release(filePtr); + } + + if (!obj_ptr.isNull()) + { + jam(); + c_obj_pool.release(obj_ptr); + } + } execute(signal, op->m_callback, 0); } @@ -14839,8 +14940,6 @@ Dbdict::create_file_prepare_complete(Signal* signal, SchemaOp* op){ break; case 1: req->requestInfo = CreateFileImplReq::Open; - if(getNodeState().getNodeRestartInProgress()) - req->requestInfo = CreateFileImplReq::CreateForce; break; case 2: req->requestInfo = CreateFileImplReq::CreateForce; @@ -14946,61 +15045,71 @@ Dbdict::create_file_abort_start(Signal* signal, SchemaOp* op) { CreateFileImplReq* req = (CreateFileImplReq*)signal->getDataPtrSend(); - FilePtr f_ptr; - c_file_pool.getPtr(f_ptr, op->m_obj_ptr_i); - - FilegroupPtr fg_ptr; - ndbrequire(c_filegroup_hash.find(fg_ptr, f_ptr.p->m_filegroup_id)); - - req->senderData = op->key; - req->senderRef = reference(); - req->requestInfo = CreateFileImplReq::Abort; - - req->file_id = f_ptr.p->key; - req->filegroup_id = f_ptr.p->m_filegroup_id; - req->filegroup_version = fg_ptr.p->m_version; - - Uint32 ref= 0; - switch(op->m_obj_type){ - case DictTabInfo::Datafile: - ref = TSMAN_REF; - break; - case DictTabInfo::Undofile: - ref = LGMAN_REF; - break; - default: - ndbrequire(false); + if (op->m_obj_ptr_i != RNIL) + { + FilePtr f_ptr; + c_file_pool.getPtr(f_ptr, op->m_obj_ptr_i); + + FilegroupPtr fg_ptr; + ndbrequire(c_filegroup_hash.find(fg_ptr, f_ptr.p->m_filegroup_id)); + + req->senderData = op->key; + req->senderRef = reference(); + req->requestInfo = CreateFileImplReq::Abort; + + req->file_id = f_ptr.p->key; + req->filegroup_id = f_ptr.p->m_filegroup_id; + req->filegroup_version = fg_ptr.p->m_version; + + Uint32 ref= 0; + switch(op->m_obj_type){ + case DictTabInfo::Datafile: + ref = TSMAN_REF; + break; + case DictTabInfo::Undofile: + ref = LGMAN_REF; + break; + default: + ndbrequire(false); + } + + sendSignal(ref, GSN_CREATE_FILE_REQ, signal, + CreateFileImplReq::AbortLength, JBB); + return; } - - sendSignal(ref, GSN_CREATE_FILE_REQ, signal, - CreateFileImplReq::AbortLength, JBB); + + execute(signal, op->m_callback, 0); } void Dbdict::create_file_abort_complete(Signal* signal, SchemaOp* op) { - FilePtr f_ptr; - c_file_pool.getPtr(f_ptr, op->m_obj_ptr_i); - - FilegroupPtr fg_ptr; - ndbrequire(c_filegroup_hash.find(fg_ptr, f_ptr.p->m_filegroup_id)); - - switch(fg_ptr.p->m_type){ - case DictTabInfo::Tablespace: - decrease_ref_count(fg_ptr.p->m_obj_ptr_i); - break; - case DictTabInfo::LogfileGroup: + if (op->m_obj_ptr_i != RNIL) { - LocalDLList<File> list(c_file_pool, fg_ptr.p->m_logfilegroup.m_files); - list.remove(f_ptr); - break; - } - default: - ndbrequire(false); + FilePtr f_ptr; + c_file_pool.getPtr(f_ptr, op->m_obj_ptr_i); + + FilegroupPtr fg_ptr; + ndbrequire(c_filegroup_hash.find(fg_ptr, f_ptr.p->m_filegroup_id)); + + switch(fg_ptr.p->m_type){ + case DictTabInfo::Tablespace: + decrease_ref_count(fg_ptr.p->m_obj_ptr_i); + break; + case DictTabInfo::LogfileGroup: + { + LocalDLList<File> list(c_file_pool, fg_ptr.p->m_logfilegroup.m_files); + list.remove(f_ptr); + break; + } + default: + ndbrequire(false); + } + + release_object(f_ptr.p->m_obj_ptr_i); + c_file_pool.release(f_ptr); } - release_object(f_ptr.p->m_obj_ptr_i); - execute(signal, op->m_callback, 0); } @@ -15036,7 +15145,8 @@ Dbdict::drop_file_commit_complete(Signal* signal, SchemaOp* op) decrease_ref_count(fg_ptr.p->m_obj_ptr_i); release_object(f_ptr.p->m_obj_ptr_i); - + c_file_pool.release(f_ptr); + execute(signal, op->m_callback, 0); } @@ -15223,7 +15333,8 @@ Dbdict::drop_fg_commit_complete(Signal* signal, SchemaOp* op) c_filegroup_pool.getPtr(fg_ptr, op->m_obj_ptr_i); release_object(fg_ptr.p->m_obj_ptr_i); - + c_filegroup_hash.release(fg_ptr); + execute(signal, op->m_callback, 0); } diff --git a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp index 112543d5831..f2b0210288a 100644 --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp @@ -228,6 +228,15 @@ public: /* Global checkpoint identity when table created */ Uint32 gciTableCreated; + /* Is the table logged (i.e. data survives system restart) */ + enum Bits + { + TR_Logged = 0x1, + TR_RowGCI = 0x2, + TR_RowChecksum = 0x4 + }; + Uint16 m_bits; + /* Number of attibutes in table */ Uint16 noOfAttributes; @@ -266,9 +275,6 @@ public: */ Uint8 minLoadFactor; - /* Is the table logged (i.e. data survives system restart) */ - bool storedTable; - /* Convenience routines */ bool isTable() const; bool isIndex() const; @@ -508,6 +514,7 @@ public: Uint32 m_filegroup_id; Uint32 m_type; Uint64 m_file_size; + Uint64 m_file_free; RopeHandle m_path; Uint32 nextList; @@ -2001,7 +2008,10 @@ private: AttributeRecordPtr & attrPtr); void packTableIntoPages(Signal* signal); void packTableIntoPages(SimpleProperties::Writer &, TableRecordPtr, Signal* =0); - void packFilegroupIntoPages(SimpleProperties::Writer &, FilegroupPtr); + void packFilegroupIntoPages(SimpleProperties::Writer &, + FilegroupPtr, + const Uint32 undo_free_hi, + const Uint32 undo_free_lo); void packFileIntoPages(SimpleProperties::Writer &, FilePtr, const Uint32); void sendGET_TABINFOREQ(Signal* signal, @@ -2026,7 +2036,7 @@ private: ParseDictTabInfoRecord *, bool checkExist = true); void handleTabInfo(SimpleProperties::Reader & it, ParseDictTabInfoRecord *, - Uint32 tablespaceVersion); + DictTabInfo::Table & tableDesc); void handleAddTableFailure(Signal* signal, Uint32 failureLine, diff --git a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index 345d1bdac0e..fb8372a6c9c 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -238,6 +238,8 @@ public: Uint32 storedReplicas; /* "ALIVE" STORED REPLICAS */ Uint32 nextFragmentChunk; + Uint32 m_log_part_id; + Uint8 distributionKey; Uint8 fragReplicas; Uint8 noOldStoredReplicas; /* NUMBER OF "DEAD" STORED REPLICAS */ @@ -545,7 +547,9 @@ public: TO_END_COPY = 19, TO_END_COPY_ONGOING = 20, TO_WAIT_ENDING = 21, - ENDING = 22 + ENDING = 22, + + STARTING_LOCAL_FRAGMENTS = 24 }; enum ToSlaveStatus { TO_SLAVE_IDLE = 0, @@ -974,7 +978,9 @@ private: void initialiseRecordsLab(Signal *, Uint32 stepNo, Uint32, Uint32); void findReplica(ReplicaRecordPtr& regReplicaPtr, - Fragmentstore* fragPtrP, Uint32 nodeId); + Fragmentstore* fragPtrP, + Uint32 nodeId, + bool oldStoredReplicas = false); //------------------------------------ // Node failure handling methods //------------------------------------ @@ -1132,6 +1138,10 @@ private: void setNodeCopyCompleted(Uint32 nodeId, bool newState); bool checkNodeAlive(Uint32 nodeId); + void nr_start_fragments(Signal*, TakeOverRecordPtr); + void nr_start_fragment(Signal*, TakeOverRecordPtr, ReplicaRecordPtr); + void nr_run_redo(Signal*, TakeOverRecordPtr); + // Initialisation void initData(); void initRecords(); @@ -1158,7 +1168,8 @@ private: Uint32 c_nextNodeGroup; NodeGroupRecord *nodeGroupRecord; - + Uint32 c_nextLogPart; + NodeRecord *nodeRecord; PageRecord *pageRecord; diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index cd923b83978..61b60cbc294 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -609,6 +609,14 @@ void Dbdih::execCONTINUEB(Signal* signal) checkWaitDropTabFailedLqh(signal, nodeId, tableId); return; } + case DihContinueB::ZTO_START_FRAGMENTS: + { + TakeOverRecordPtr takeOverPtr; + takeOverPtr.i = signal->theData[1]; + ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord); + nr_start_fragments(signal, takeOverPtr); + return; + } }//switch ndbrequire(false); @@ -1771,11 +1779,6 @@ void Dbdih::execSTART_MEREQ(Signal* signal) ndbrequire(c_nodeStartMaster.startNode == Tnodeid); ndbrequire(getNodeStatus(Tnodeid) == NodeRecord::STARTING); - sendSTART_RECREQ(signal, Tnodeid); -}//Dbdih::execSTART_MEREQ() - -void Dbdih::nodeRestartStartRecConfLab(Signal* signal) -{ c_nodeStartMaster.blockLcp = true; if ((c_lcpState.lcpStatus != LCP_STATUS_IDLE) && (c_lcpState.lcpStatus != LCP_TCGET)) { @@ -2586,13 +2589,14 @@ void Dbdih::sendStartTo(Signal* signal, Uint32 takeOverPtrI) return; }//if c_startToLock = takeOverPtrI; + + takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING; StartToReq * const req = (StartToReq *)&signal->theData[0]; req->userPtr = takeOverPtr.i; req->userRef = reference(); req->startingNodeId = takeOverPtr.p->toStartingNode; req->nodeTakenOver = takeOverPtr.p->toFailedNode; req->nodeRestart = takeOverPtr.p->toNodeRestart; - takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING; sendLoopMacro(START_TOREQ, sendSTART_TOREQ); }//Dbdih::sendStartTo() @@ -2636,9 +2640,153 @@ void Dbdih::execSTART_TOCONF(Signal* signal) CRASH_INSERTION(7134); c_startToLock = RNIL; + if (takeOverPtr.p->toNodeRestart) + { + jam(); + takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING_LOCAL_FRAGMENTS; + nr_start_fragments(signal, takeOverPtr); + return; + } + startNextCopyFragment(signal, takeOverPtr.i); }//Dbdih::execSTART_TOCONF() +void +Dbdih::nr_start_fragments(Signal* signal, + TakeOverRecordPtr takeOverPtr) +{ + Uint32 loopCount = 0 ; + TabRecordPtr tabPtr; + while (loopCount++ < 100) { + tabPtr.i = takeOverPtr.p->toCurrentTabref; + if (tabPtr.i >= ctabFileSize) { + jam(); + nr_run_redo(signal, takeOverPtr); + return; + }//if + ptrAss(tabPtr, tabRecord); + if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){ + jam(); + takeOverPtr.p->toCurrentFragid = 0; + takeOverPtr.p->toCurrentTabref++; + continue; + }//if + Uint32 fragId = takeOverPtr.p->toCurrentFragid; + if (fragId >= tabPtr.p->totalfragments) { + jam(); + takeOverPtr.p->toCurrentFragid = 0; + takeOverPtr.p->toCurrentTabref++; + continue; + }//if + FragmentstorePtr fragPtr; + getFragstore(tabPtr.p, fragId, fragPtr); + ReplicaRecordPtr loopReplicaPtr; + loopReplicaPtr.i = fragPtr.p->oldStoredReplicas; + while (loopReplicaPtr.i != RNIL) { + ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord); + if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) { + jam(); + nr_start_fragment(signal, takeOverPtr, loopReplicaPtr); + break; + } else { + jam(); + loopReplicaPtr.i = loopReplicaPtr.p->nextReplica; + }//if + }//while + takeOverPtr.p->toCurrentFragid++; + }//while + signal->theData[0] = DihContinueB::ZTO_START_FRAGMENTS; + signal->theData[1] = takeOverPtr.i; + sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); +} + +void +Dbdih::nr_start_fragment(Signal* signal, + TakeOverRecordPtr takeOverPtr, + ReplicaRecordPtr replicaPtr) +{ + Uint32 i, j = 0; + Uint32 maxLcpId = 0; + Uint32 maxLcpIndex = ~0; + + Uint32 restorableGCI = 0; + + ndbout_c("tab: %d frag: %d replicaP->nextLcp: %d", + takeOverPtr.p->toCurrentTabref, + takeOverPtr.p->toCurrentFragid, + replicaPtr.p->nextLcp); + + Uint32 idx = replicaPtr.p->nextLcp; + for(i = 0; i<MAX_LCP_STORED; i++, idx = nextLcpNo(idx)) + { + ndbout_c("scanning idx: %d lcpId: %d", idx, replicaPtr.p->lcpId[idx]); + if (replicaPtr.p->lcpStatus[idx] == ZVALID) + { + ndbrequire(replicaPtr.p->lcpId[idx] > maxLcpId); + Uint32 startGci = replicaPtr.p->maxGciCompleted[idx]; + Uint32 stopGci = replicaPtr.p->maxGciStarted[idx]; + for (;j < replicaPtr.p->noCrashedReplicas; j++) + { + ndbout_c("crashed replica: %d(%d) replicaLastGci: %d", + j, + replicaPtr.p->noCrashedReplicas, + replicaPtr.p->replicaLastGci[j]); + if (replicaPtr.p->replicaLastGci[j] > stopGci) + { + maxLcpId = replicaPtr.p->lcpId[idx]; + maxLcpIndex = idx; + restorableGCI = replicaPtr.p->replicaLastGci[j]; + break; + } + } + } + } + + if (maxLcpIndex == ~0) + { + ndbout_c("Didnt find any LCP for node: %d tab: %d frag: %d", + takeOverPtr.p->toStartingNode, + takeOverPtr.p->toCurrentTabref, + takeOverPtr.p->toCurrentFragid); + replicaPtr.p->lcpIdStarted = 0; + } + else + { + ndbout_c("Found LCP: %d(%d) maxGciStarted: %d maxGciCompleted: %d restorable: %d(%d) newestRestorableGCI: %d", + maxLcpId, + maxLcpIndex, + replicaPtr.p->maxGciStarted[maxLcpIndex], + replicaPtr.p->maxGciCompleted[maxLcpIndex], + restorableGCI, + SYSFILE->lastCompletedGCI[takeOverPtr.p->toStartingNode], + SYSFILE->newestRestorableGCI); + + replicaPtr.p->lcpIdStarted = restorableGCI; + BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toStartingNode); + StartFragReq *req = (StartFragReq *)signal->getDataPtrSend(); + req->userPtr = 0; + req->userRef = reference(); + req->lcpNo = maxLcpIndex; + req->lcpId = maxLcpId; + req->tableId = takeOverPtr.p->toCurrentTabref; + req->fragId = takeOverPtr.p->toCurrentFragid; + req->noOfLogNodes = 1; + req->lqhLogNode[0] = takeOverPtr.p->toStartingNode; + req->startGci[0] = replicaPtr.p->maxGciCompleted[maxLcpIndex]; + req->lastGci[0] = restorableGCI; + sendSignal(ref, GSN_START_FRAGREQ, signal, + StartFragReq::SignalLength, JBB); + } +} + +void +Dbdih::nr_run_redo(Signal* signal, TakeOverRecordPtr takeOverPtr) +{ + takeOverPtr.p->toCurrentTabref = 0; + takeOverPtr.p->toCurrentFragid = 0; + sendSTART_RECREQ(signal, takeOverPtr.p->toStartingNode); +} + void Dbdih::initStartTakeOver(const StartToReq * req, TakeOverRecordPtr takeOverPtr) { @@ -2971,6 +3119,14 @@ void Dbdih::execCREATE_FRAGCONF(Signal* signal) /*---------------------------------------------------------------------- */ FragmentstorePtr fragPtr; getFragstore(tabPtr.p, fragId, fragPtr); + Uint32 gci = 0; + if (takeOverPtr.p->toNodeRestart) + { + ReplicaRecordPtr replicaPtr; + findReplica(replicaPtr, fragPtr.p, takeOverPtr.p->toStartingNode, true); + gci = replicaPtr.p->lcpIdStarted; + replicaPtr.p->lcpIdStarted = 0; + } takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_FRAG; BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toCopyNode); CopyFragReq * const copyFragReq = (CopyFragReq *)&signal->theData[0]; @@ -2981,6 +3137,7 @@ void Dbdih::execCREATE_FRAGCONF(Signal* signal) copyFragReq->nodeId = takeOverPtr.p->toStartingNode; copyFragReq->schemaVersion = tabPtr.p->schemaVersion; copyFragReq->distributionKey = fragPtr.p->distributionKey; + copyFragReq->gci = gci; sendSignal(ref, GSN_COPY_FRAGREQ, signal, CopyFragReq::SignalLength, JBB); } else { ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COMMIT_CREATE); @@ -4033,6 +4190,8 @@ void Dbdih::checkTakeOverInMasterStartNodeFailure(Signal* signal, Uint32 takeOverPtrI) { jam(); + ndbout_c("checkTakeOverInMasterStartNodeFailure %x", + takeOverPtrI); if (takeOverPtrI == RNIL) { jam(); return; @@ -4046,6 +4205,9 @@ void Dbdih::checkTakeOverInMasterStartNodeFailure(Signal* signal, takeOverPtr.i = takeOverPtrI; ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord); + ndbout_c("takeOverPtr.p->toMasterStatus: %x", + takeOverPtr.p->toMasterStatus); + bool ok = false; switch (takeOverPtr.p->toMasterStatus) { case TakeOverRecord::IDLE: @@ -4154,6 +4316,13 @@ void Dbdih::checkTakeOverInMasterStartNodeFailure(Signal* signal, //----------------------------------------------------------------------- endTakeOver(takeOverPtr.i); break; + + case TakeOverRecord::STARTING_LOCAL_FRAGMENTS: + ok = true; + jam(); + endTakeOver(takeOverPtr.i); + break; + /** * The following are states that it should not be possible to "be" in */ @@ -6585,6 +6754,8 @@ void Dbdih::execDIADDTABREQ(Signal* signal) Uint32 activeIndex = 0; getFragstore(tabPtr.p, fragId, fragPtr); fragPtr.p->preferredPrimary = fragments[index]; + fragPtr.p->m_log_part_id = c_nextLogPart++; + for (Uint32 i = 0; i<noReplicas; i++) { const Uint32 nodeId = fragments[index++]; ReplicaRecordPtr replicaPtr; @@ -6629,9 +6800,9 @@ Dbdih::sendAddFragreq(Signal* signal, ConnectRecordPtr connectPtr, jam(); const Uint32 fragCount = tabPtr.p->totalfragments; ReplicaRecordPtr replicaPtr; replicaPtr.i = RNIL; + FragmentstorePtr fragPtr; for(; fragId<fragCount; fragId++){ jam(); - FragmentstorePtr fragPtr; getFragstore(tabPtr.p, fragId, fragPtr); replicaPtr.i = fragPtr.p->storedReplicas; @@ -6689,6 +6860,7 @@ Dbdih::sendAddFragreq(Signal* signal, ConnectRecordPtr connectPtr, req->nodeId = getOwnNodeId(); req->totalFragments = fragCount; req->startGci = SYSFILE->newestRestorableGCI; + req->logPartId = fragPtr.p->m_log_part_id; sendSignal(DBDICT_REF, GSN_ADD_FRAGREQ, signal, AddFragReq::SignalLength, JBB); return; @@ -8875,8 +9047,8 @@ void Dbdih::execSTART_RECCONF(Signal* signal) // otherwise we have a problem. /* --------------------------------------------------------------------- */ jam(); - ndbrequire(senderNodeId == c_nodeStartMaster.startNode); - nodeRestartStartRecConfLab(signal); + ndbout_c("startNextCopyFragment"); + startNextCopyFragment(signal, findTakeOver(senderNodeId)); return; } else { /* --------------------------------------------------------------------- */ @@ -9895,9 +10067,11 @@ Dbdih::checkLcpAllTablesDoneInLqh(){ } void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr, - Fragmentstore* fragPtrP, Uint32 nodeId) + Fragmentstore* fragPtrP, + Uint32 nodeId, + bool old) { - replicaPtr.i = fragPtrP->storedReplicas; + replicaPtr.i = old ? fragPtrP->oldStoredReplicas : fragPtrP->storedReplicas; while(replicaPtr.i != RNIL){ ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); if (replicaPtr.p->procNode == nodeId) { @@ -11168,6 +11342,7 @@ void Dbdih::initCommonData() currentgcp = 0; cverifyQueueCounter = 0; cwaitLcpSr = false; + c_nextLogPart = 0; nodeResetStart(); c_nodeStartMaster.wait = ZFALSE; @@ -11972,6 +12147,8 @@ void Dbdih::readFragment(RWFragment* rf, FragmentstorePtr fragPtr) jam(); fragPtr.p->distributionKey = TdistKey; }//if + + fragPtr.p->m_log_part_id = readPageWord(rf); }//Dbdih::readFragment() Uint32 Dbdih::readPageWord(RWFragment* rf) @@ -13062,6 +13239,7 @@ void Dbdih::writeFragment(RWFragment* wf, FragmentstorePtr fragPtr) writePageWord(wf, fragPtr.p->noStoredReplicas); writePageWord(wf, fragPtr.p->noOldStoredReplicas); writePageWord(wf, fragPtr.p->distributionKey); + writePageWord(wf, fragPtr.p->m_log_part_id); }//Dbdih::writeFragment() void Dbdih::writePageWord(RWFragment* wf, Uint32 dataWord) diff --git a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index 7d2d45651ae..bb3941d099c 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -142,7 +142,7 @@ /* ------------------------------------------------------------------------- */ #define ZFD_HEADER_SIZE 3 #define ZFD_PART_SIZE 48 -#define ZLOG_HEAD_SIZE 6 +#define ZLOG_HEAD_SIZE 8 #define ZNEXT_LOG_SIZE 2 #define ZABORT_LOG_SIZE 3 #define ZCOMMIT_LOG_SIZE 9 @@ -264,15 +264,6 @@ #define ZSTORED_PROC_SCAN 0 #define ZSTORED_PROC_COPY 2 #define ZDELETE_STORED_PROC_ID 3 -//#define ZSCAN_NEXT 1 -//#define ZSCAN_NEXT_COMMIT 2 -//#define ZSCAN_NEXT_ABORT 12 -#define ZCOPY_COMMIT 3 -#define ZCOPY_REPEAT 4 -#define ZCOPY_ABORT 5 -#define ZCOPY_CLOSE 6 -//#define ZSCAN_CLOSE 6 -//#define ZEMPTY_FRAGMENT 0 #define ZWRITE_LOCK 1 #define ZSCAN_FRAG_CLOSED 2 /* ------------------------------------------------------------------------- */ @@ -525,7 +516,7 @@ public: Uint32 scan_acc_index; Uint32 scan_acc_attr_recs; UintR scanApiOpPtr; - UintR scanLocalref[2]; + Local_key m_row_id; Uint32 m_max_batch_size_rows; Uint32 m_max_batch_size_bytes; @@ -553,7 +544,6 @@ public: UintR scanAccPtr; UintR scanAiLength; UintR scanErrorCounter; - UintR scanLocalFragid; UintR scanSchemaVersion; /** @@ -747,10 +737,16 @@ public: FragStatus fragStatus; /** - * Indicates a local checkpoint is active and thus can generate - * UNDO log records. + * 0 = undefined i.e fragStatus != ACTIVE_CREATION + * 1 = yes + * 2 = no */ - UintR fragActiveStatus; + enum ActiveCreat { + AC_NORMAL = 0, // fragStatus != ACTIVE_CREATION + AC_IGNORED = 1, // Operation that got ignored during NR + AC_NR_COPY = 2 // Operation that got performed during NR + }; + Uint8 m_copy_started_state; /** * This flag indicates whether logging is currently activated at @@ -889,6 +885,11 @@ public: * fragment in primary table. */ UintR tableFragptr; + + /** + * Log part + */ + Uint32 m_log_part_ptr_i; }; typedef Ptr<Fragrecord> FragrecordPtr; @@ -2030,7 +2031,16 @@ public: Uint8 seqNoReplica; Uint8 tcNodeFailrec; Uint8 m_disk_table; - Uint32 m_local_key; + Uint8 m_use_rowid; + Uint8 m_dealloc; + Uint32 m_log_part_ptr_i; + Local_key m_row_id; + + struct { + Uint32 m_cnt; + Uint32 m_page_id[2]; + Local_key m_disk_ref[2]; + } m_nr_delete; }; /* p2c: size = 280 bytes */ typedef Ptr<TcConnectionrec> TcConnectionrecPtr; @@ -2095,10 +2105,6 @@ private: void execBUILDINDXCONF(Signal*signal); void execDUMP_STATE_ORD(Signal* signal); - void execACC_COM_BLOCK(Signal* signal); - void execACC_COM_UNBLOCK(Signal* signal); - void execTUP_COM_BLOCK(Signal* signal); - void execTUP_COM_UNBLOCK(Signal* signal); void execACC_ABORTCONF(Signal* signal); void execNODE_FAILREP(Signal* signal); void execCHECK_LCP_STOP(Signal* signal); @@ -2181,6 +2187,7 @@ private: void execDROP_TAB_REQ(Signal* signal); void execLQH_ALLOCREQ(Signal* signal); + void execTUP_DEALLOCREQ(Signal* signal); void execLQH_WRITELOG_REQ(Signal* signal); void execTUXFRAGCONF(Signal* signal); @@ -2234,7 +2241,7 @@ private: Uint32 sendKeyinfo20(Signal* signal, ScanRecord *, TcConnectionrec *); void sendScanFragConf(Signal* signal, Uint32 scanCompleted); void initCopyrec(Signal* signal); - void initCopyTc(Signal* signal); + void initCopyTc(Signal* signal, Operation_t); void sendCopyActiveConf(Signal* signal,Uint32 tableId); void checkLcpCompleted(Signal* signal); void checkLcpHoldop(Signal* signal); @@ -2277,7 +2284,7 @@ private: void checkReadExecSr(Signal* signal); void checkScanTcCompleted(Signal* signal); void checkSrCompleted(Signal* signal); - void closeFile(Signal* signal, LogFileRecordPtr logFilePtr); + void closeFile(Signal* signal, LogFileRecordPtr logFilePtr, Uint32 place); void completedLogPage(Signal* signal, Uint32 clpType, Uint32 place); void deleteFragrec(Uint32 fragId); void deleteTransidHash(Signal* signal); @@ -2561,10 +2568,26 @@ private: void acckeyconf_tupkeyreq(Signal*, TcConnectionrec*, Fragrecord*, Uint32, Uint32); void acckeyconf_load_diskpage(Signal*,TcConnectionrecPtr,Fragrecord*,Uint32); - + + void handle_nr_copy(Signal*, Ptr<TcConnectionrec>); + void exec_acckeyreq(Signal*, Ptr<TcConnectionrec>); + int compare_key(const TcConnectionrec*, const Uint32 * ptr, Uint32 len); + void nr_copy_delete_row(Signal*, Ptr<TcConnectionrec>, Local_key*, Uint32); +public: + struct Nr_op_info + { + Uint32 m_ptr_i; + Uint32 m_tup_frag_ptr_i; + Uint32 m_gci; + Uint32 m_page_id; + Local_key m_disk_ref; + }; + void get_nr_op_info(Nr_op_info*, Uint32 page_id = RNIL); + void nr_delete_complete(Signal*, Nr_op_info*); + public: void acckeyconf_load_diskpage_callback(Signal*, Uint32, Uint32); - + private: void next_scanconf_load_diskpage(Signal* signal, ScanRecordPtr scanPtr, @@ -2823,11 +2846,6 @@ private: UintR cLqhTimeOutCount; UintR cLqhTimeOutCheckCount; UintR cnoOfLogPages; - bool caccCommitBlocked; - bool ctupCommitBlocked; - bool cCommitBlocked; - UintR cCounterAccCommitBlocked; - UintR cCounterTupCommitBlocked; /* ------------------------------------------------------------------------- */ /*THIS VARIABLE CONTAINS MY OWN PROCESSOR ID. */ /* ------------------------------------------------------------------------- */ @@ -2847,17 +2865,12 @@ private: Uint16 cpackedList[MAX_NDB_NODES]; UintR cnodeData[MAX_NDB_NODES]; UintR cnodeStatus[MAX_NDB_NODES]; -/* ------------------------------------------------------------------------- */ -/*THIS VARIABLE INDICATES WHETHER A CERTAIN NODE HAS SENT ALL FRAGMENTS THAT */ -/*NEED TO HAVE THE LOG EXECUTED. */ -/* ------------------------------------------------------------------------- */ - Uint8 cnodeSrState[MAX_NDB_NODES]; -/* ------------------------------------------------------------------------- */ -/*THIS VARIABLE INDICATES WHETHER A CERTAIN NODE HAVE EXECUTED THE LOG */ -/* ------------------------------------------------------------------------- */ - Uint8 cnodeExecSrState[MAX_NDB_NODES]; UintR cnoOfNodes; + NdbNodeBitmask m_sr_nodes; + NdbNodeBitmask m_sr_exec_sr_req; + NdbNodeBitmask m_sr_exec_sr_conf; + /* ------------------------------------------------------------------------- */ /* THIS VARIABLE CONTAINS THE DIRECTORY OF A HASH TABLE OF ALL ACTIVE */ /* OPERATION IN THE BLOCK. IT IS USED TO BE ABLE TO QUICKLY ABORT AN */ @@ -2985,6 +2998,10 @@ Dblqh::accminupdate(Signal* signal, Uint32 opId, const Local_key* key) signal->theData[0] = regTcPtr.p->accConnectrec; signal->theData[1] = key->m_page_no << MAX_TUPLES_BITS | key->m_page_idx; c_acc->execACCMINUPDATE(signal); + + if (ERROR_INSERTED(5712)) + ndbout << " LK: " << *key; + regTcPtr.p->m_row_id = *key; } diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp index ca7a8bb9631..06b52d9b31b 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp @@ -221,10 +221,6 @@ Dblqh::Dblqh(const class Configuration & conf): addRecSignal(GSN_DROP_TRIG_REF, &Dblqh::execDROP_TRIG_REF); addRecSignal(GSN_DUMP_STATE_ORD, &Dblqh::execDUMP_STATE_ORD); - addRecSignal(GSN_ACC_COM_BLOCK, &Dblqh::execACC_COM_BLOCK); - addRecSignal(GSN_ACC_COM_UNBLOCK, &Dblqh::execACC_COM_UNBLOCK); - addRecSignal(GSN_TUP_COM_BLOCK, &Dblqh::execTUP_COM_BLOCK); - addRecSignal(GSN_TUP_COM_UNBLOCK, &Dblqh::execTUP_COM_UNBLOCK); addRecSignal(GSN_NODE_FAILREP, &Dblqh::execNODE_FAILREP); addRecSignal(GSN_CHECK_LCP_STOP, &Dblqh::execCHECK_LCP_STOP); addRecSignal(GSN_SEND_PACKED, &Dblqh::execSEND_PACKED); @@ -301,6 +297,7 @@ Dblqh::Dblqh(const class Configuration & conf): addRecSignal(GSN_LQH_ALLOCREQ, &Dblqh::execLQH_ALLOCREQ); addRecSignal(GSN_LQH_WRITELOG_REQ, &Dblqh::execLQH_WRITELOG_REQ); + addRecSignal(GSN_TUP_DEALLOCREQ, &Dblqh::execTUP_DEALLOCREQ); // TUX addRecSignal(GSN_TUXFRAGCONF, &Dblqh::execTUXFRAGCONF); diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index c9e13186909..e9211285d71 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -118,57 +118,19 @@ operator<<(NdbOut& out, Dblqh::ScanRecord::ScanType state){ const Uint32 NR_ScanNo = 0; -void Dblqh::execACC_COM_BLOCK(Signal* signal) -{ - jamEntry(); -/* ------------------------------------------------------------------------- */ -// Undo log buffer in ACC is in critical sector of being full. -/* ------------------------------------------------------------------------- */ - cCounterAccCommitBlocked++; - caccCommitBlocked = true; - cCommitBlocked = true; - return; -}//Dblqh::execACC_COM_BLOCK() - -void Dblqh::execACC_COM_UNBLOCK(Signal* signal) -{ - jamEntry(); -/* ------------------------------------------------------------------------- */ -// Undo log buffer in ACC ok again. -/* ------------------------------------------------------------------------- */ - caccCommitBlocked = false; - if (ctupCommitBlocked == false) { - jam(); - cCommitBlocked = false; - }//if - return; -}//Dblqh::execACC_COM_UNBLOCK() - -void Dblqh::execTUP_COM_BLOCK(Signal* signal) -{ - jamEntry(); -/* ------------------------------------------------------------------------- */ -// Undo log buffer in TUP is in critical sector of being full. -/* ------------------------------------------------------------------------- */ - cCounterTupCommitBlocked++; - ctupCommitBlocked = true; - cCommitBlocked = true; - return; -}//Dblqh::execTUP_COM_BLOCK() - -void Dblqh::execTUP_COM_UNBLOCK(Signal* signal) -{ - jamEntry(); -/* ------------------------------------------------------------------------- */ -// Undo log buffer in TUP ok again. -/* ------------------------------------------------------------------------- */ - ctupCommitBlocked = false; - if (caccCommitBlocked == false) { - jam(); - cCommitBlocked = false; - }//if - return; -}//Dblqh::execTUP_COM_UNBLOCK() +#if defined VM_TRACE || defined ERROR_INSERT || defined NDBD_TRACENR +static FileOutputStream tracenr_fos(fopen("tracenr.log", "w+")); +NdbOut tracenrout(tracenr_fos); +static int TRACENR_FLAG = 0; +#define TRACENR(x) tracenrout << x +#define SET_TRACENR_FLAG TRACENR_FLAG = 1 +#define CLEAR_TRACENR_FLAG TRACENR_FLAG = 0 +#else +#define TRACENR_FLAG 0 +#define TRACENR(x) +#define SET_TRACENR_FLAG +#define CLEAR_TRACENR_FLAG +#endif /* ------------------------------------------------------------------------- */ /* ------- SEND SYSTEM ERROR ------- */ @@ -242,12 +204,6 @@ void Dblqh::execCONTINUEB(Signal* signal) ptrCheckGuard(tcConnectptr, ctcConnectrecFileSize, tcConnectionrec); fragptr.i = tcConnectptr.p->fragmentptr; c_fragment_pool.getPtr(fragptr); - if ((cCommitBlocked == true) && - (fragptr.p->fragActiveStatus == ZTRUE)) { - jam(); - sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 10, 2); - return; - }//if logPartPtr.p->LogLqhKeyReqSent = ZFALSE; getFirstInLogQueue(signal); @@ -289,12 +245,13 @@ void Dblqh::execCONTINUEB(Signal* signal) writeCommitLog(signal, logPartPtr); logNextStart(signal); if (tcConnectptr.p->transactionState == TcConnectionrec::LOG_COMMIT_QUEUED) { - if (tcConnectptr.p->seqNoReplica != 0) { + if (tcConnectptr.p->seqNoReplica == 0 || + tcConnectptr.p->activeCreat == Fragrecord::AC_NR_COPY) { jam(); - commitReplyLab(signal); + localCommitLab(signal); } else { jam(); - localCommitLab(signal); + commitReplyLab(signal); }//if return; } else { @@ -556,12 +513,13 @@ void Dblqh::execNDB_STTOR(Signal* signal) preComputedRequestInfoMask = 0; LqhKeyReq::setKeyLen(preComputedRequestInfoMask, RI_KEYLEN_MASK); LqhKeyReq::setLastReplicaNo(preComputedRequestInfoMask, RI_LAST_REPL_MASK); - LqhKeyReq::setLockType(preComputedRequestInfoMask, RI_LOCK_TYPE_MASK); // Dont LqhKeyReq::setApplicationAddressFlag LqhKeyReq::setDirtyFlag(preComputedRequestInfoMask, 1); // Dont LqhKeyReq::setInterpretedFlag LqhKeyReq::setSimpleFlag(preComputedRequestInfoMask, 1); LqhKeyReq::setOperation(preComputedRequestInfoMask, RI_OPERATION_MASK); + LqhKeyReq::setGCIFlag(preComputedRequestInfoMask, 1); + LqhKeyReq::setNrCopyFlag(preComputedRequestInfoMask, 1); // Dont setAIInLqhKeyReq // Dont setSeqNoReplica // Dont setSameClientAndTcFlag @@ -813,6 +771,7 @@ void Dblqh::execREAD_NODESCONF(Signal* signal) jam(); if (NodeBitmask::get(readNodes->allNodes, i)) { jam(); + m_sr_nodes.set(i); cnodeData[ind] = i; cnodeStatus[ind] = NodeBitmask::get(readNodes->inactiveNodes, i); //readNodes->getVersionId(i, readNodes->theVersionIds) not used @@ -824,11 +783,23 @@ void Dblqh::execREAD_NODESCONF(Signal* signal) ndbrequire(!(cnoOfNodes == 1 && cstartType == NodeState::ST_NODE_RESTART)); caddNodeState = ZFALSE; - if (cstartType == NodeState::ST_SYSTEM_RESTART) { + if (cstartType == NodeState::ST_SYSTEM_RESTART) + { jam(); sendNdbSttorryLab(signal); return; - }//if + } + else if (cstartType == NodeState::ST_NODE_RESTART) + { + jam(); + SET_TRACENR_FLAG; + m_sr_nodes.clear(); + m_sr_nodes.set(getOwnNodeId()); + sendNdbSttorryLab(signal); + return; + } + SET_TRACENR_FLAG; + checkStartCompletedLab(signal); return; }//Dblqh::execREAD_NODESCONF() @@ -858,6 +829,7 @@ void Dblqh::startphase6Lab(Signal* signal) { cstartPhase = ZNIL; cstartType = ZNIL; + CLEAR_TRACENR_FLAG; sendNdbSttorryLab(signal); return; }//Dblqh::startphase6Lab() @@ -979,6 +951,13 @@ void Dblqh::execLQHFRAGREQ(Signal* signal) Uint32 tableType = req->tableType; Uint32 primaryTableId = req->primaryTableId; Uint32 tablespace= req->tablespace_id; + Uint32 logPart = req->logPartId; + + if (signal->getLength() < 20) + { + logPart = (fragId & 1) + 2 * (tabptr.i & 1); + } + logPart &= 3; ptrCheckGuard(tabptr, ctabrecFileSize, tablerec); bool tempTable = ((reqinfo & LqhFragReq::TemporaryTable) != 0); @@ -1021,7 +1000,8 @@ void Dblqh::execLQHFRAGREQ(Signal* signal) fragptr.p->startGci = startGci; fragptr.p->newestGci = startGci; fragptr.p->tableType = tableType; - + fragptr.p->m_log_part_ptr_i = logPart; // assumes array + if (DictTabInfo::isOrderedIndex(tableType)) { jam(); // find corresponding primary table fragment @@ -1385,9 +1365,14 @@ void Dblqh::execTUP_ADD_ATTCONF(Signal* signal) if (addfragptr.p->fragCopyCreation == 1) { jam(); if (! DictTabInfo::isOrderedIndex(addfragptr.p->tableType)) + { + fragptr.p->m_copy_started_state = Fragrecord::AC_IGNORED; fragptr.p->fragStatus = Fragrecord::ACTIVE_CREATION; + } else + { fragptr.p->fragStatus = Fragrecord::FSACTIVE; + } fragptr.p->logFlag = Fragrecord::STATE_FALSE; } else { jam(); @@ -2019,17 +2004,6 @@ void Dblqh::execTIME_SIGNAL(Signal* signal) jamEntry(); cLqhTimeOutCount++; cLqhTimeOutCheckCount++; - if ((cCounterAccCommitBlocked > 0) || - (cCounterTupCommitBlocked > 0)) { - jam(); - signal->theData[0] = NDB_LE_UndoLogBlocked; - signal->theData[1] = cCounterTupCommitBlocked; - signal->theData[2] = cCounterAccCommitBlocked; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); - - cCounterTupCommitBlocked = 0; - cCounterAccCommitBlocked = 0; - }//if if (cLqhTimeOutCheckCount < 10) { jam(); return; @@ -2527,6 +2501,7 @@ void Dblqh::execTUPKEYREF(Signal* signal) c_fragment_pool.getPtr(regFragptr); fragptr = regFragptr; + TcConnectionrec* regTcPtr = tcConnectptr.p; switch (tcConnectptr.p->transactionState) { case TcConnectionrec::WAIT_TUP: jam(); @@ -3191,7 +3166,7 @@ void Dblqh::execLQHKEYREQ(Signal* signal) c_Counters.operations++; TcConnectionrec * const regTcPtr = tcConnectptr.p; - regTcPtr->clientBlockref = signal->senderBlockRef(); + Uint32 senderRef = regTcPtr->clientBlockref = signal->senderBlockRef(); regTcPtr->clientConnectrec = sig0; regTcPtr->tcOprec = sig0; regTcPtr->storedProcId = ZNIL; @@ -3210,10 +3185,12 @@ void Dblqh::execLQHKEYREQ(Signal* signal) regTcPtr->tcBlockref = sig5; const Uint8 op = LqhKeyReq::getOperation(Treqinfo); - if (op == ZREAD && !getAllowRead()){ + if ((op == ZREAD || op == ZREAD_EX) && !getAllowRead()){ noFreeRecordLab(signal, lqhKeyReq, ZNODE_SHUTDOWN_IN_PROGESS); return; } + + Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version; regTcPtr->totReclenAi = LqhKeyReq::getAttrLen(TtotReclenAi); regTcPtr->tcScanInfo = lqhKeyReq->scanInfo; @@ -3268,15 +3245,26 @@ void Dblqh::execLQHKEYREQ(Signal* signal) regTcPtr->reqinfo = Treqinfo; regTcPtr->lastReplicaNo = LqhKeyReq::getLastReplicaNo(Treqinfo); - regTcPtr->lockType = LqhKeyReq::getLockType(Treqinfo); regTcPtr->dirtyOp = LqhKeyReq::getDirtyFlag(Treqinfo); regTcPtr->opExec = LqhKeyReq::getInterpretedFlag(Treqinfo); regTcPtr->opSimple = LqhKeyReq::getSimpleFlag(Treqinfo); - regTcPtr->operation = LqhKeyReq::getOperation(Treqinfo); - regTcPtr->simpleRead = regTcPtr->operation == ZREAD && regTcPtr->opSimple; + regTcPtr->simpleRead = op == ZREAD && regTcPtr->opSimple; regTcPtr->seqNoReplica = LqhKeyReq::getSeqNoReplica(Treqinfo); UintR TreclenAiLqhkey = LqhKeyReq::getAIInLqhKeyReq(Treqinfo); regTcPtr->apiVersionNo = 0; + regTcPtr->m_use_rowid = LqhKeyReq::getRowidFlag(Treqinfo); + regTcPtr->m_dealloc = 0; + if (unlikely(senderVersion < NDBD_ROWID_VERSION)) + { + regTcPtr->operation = op; + regTcPtr->lockType = LqhKeyReq::getLockType(Treqinfo); + } + else + { + regTcPtr->operation = op == ZREAD_EX ? ZREAD : op; + regTcPtr->lockType = + op == ZREAD_EX ? ZUPDATE : op == ZWRITE ? ZINSERT : op; + } CRASH_INSERTION2(5041, regTcPtr->simpleRead && refToNode(signal->senderBlockRef()) != cownNodeid); @@ -3327,11 +3315,33 @@ void Dblqh::execLQHKEYREQ(Signal* signal) } else { nextPos += 4; }//if - } else { + } + else if (! (LqhKeyReq::getNrCopyFlag(Treqinfo))) + { LQHKEY_error(signal, 3); return; }//if - + + sig0 = lqhKeyReq->variableData[nextPos + 0]; + sig1 = lqhKeyReq->variableData[nextPos + 1]; + regTcPtr->m_row_id.m_page_no = sig0; + regTcPtr->m_row_id.m_page_idx = sig1; + nextPos += 2 * LqhKeyReq::getRowidFlag(Treqinfo); + + sig2 = lqhKeyReq->variableData[nextPos + 0]; + sig3 = cnewestGci; + regTcPtr->gci = LqhKeyReq::getGCIFlag(Treqinfo) ? sig2 : sig3; + nextPos += LqhKeyReq::getGCIFlag(Treqinfo); + + if (LqhKeyReq::getRowidFlag(Treqinfo)) + { + ndbassert(refToBlock(senderRef) != DBTC); + } + else if(op == ZINSERT) + { + ndbassert(refToBlock(senderRef) == DBTC); + } + if ((LqhKeyReq::FixedSignalLength + nextPos + TreclenAiLqhkey) != signal->length()) { LQHKEY_error(signal, 2); @@ -3398,18 +3408,34 @@ void Dblqh::execLQHKEYREQ(Signal* signal) LQHKEY_error(signal, 6); return; }//if + + if (LqhKeyReq::getNrCopyFlag(Treqinfo)) + { + ndbassert(refToBlock(senderRef) == DBLQH); + ndbassert(LqhKeyReq::getRowidFlag(Treqinfo)); + if (! (fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION)) + { + ndbout_c("fragptr.p->fragStatus: %d", + fragptr.p->fragStatus); + } + ndbassert(fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION); + fragptr.p->m_copy_started_state = Fragrecord::AC_NR_COPY; + } + Uint8 TcopyType = fragptr.p->fragCopy; + Uint32 logPart = fragptr.p->m_log_part_ptr_i; tfragDistKey = fragptr.p->fragDistributionKey; if (fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION) { jam(); - regTcPtr->activeCreat = ZTRUE; + regTcPtr->activeCreat = fragptr.p->m_copy_started_state; CRASH_INSERTION(5002); CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id); } else { - regTcPtr->activeCreat = ZFALSE; + regTcPtr->activeCreat = Fragrecord::AC_NORMAL; }//if regTcPtr->replicaType = TcopyType; regTcPtr->fragmentptr = fragptr.i; + regTcPtr->m_log_part_ptr_i = logPart; Uint8 TdistKey = LqhKeyReq::getDistributionKey(TtotReclenAi); if ((tfragDistKey != TdistKey) && (regTcPtr->seqNoReplica == 0) && @@ -3607,7 +3633,6 @@ void Dblqh::endgettupkeyLab(Signal* signal) void Dblqh::prepareContinueAfterBlockedLab(Signal* signal) { UintR ttcScanOp; - UintR taccreq; /* -------------------------------------------------------------------------- */ /* INPUT: TC_CONNECTPTR ACTIVE CONNECTION RECORD */ @@ -3620,6 +3645,7 @@ void Dblqh::prepareContinueAfterBlockedLab(Signal* signal) /* -------------------------------------------------------------------------- */ Uint32 tc_ptr_i = tcConnectptr.i; TcConnectionrec * const regTcPtr = tcConnectptr.p; + Uint32 activeCreat = regTcPtr->activeCreat; if (regTcPtr->indTakeOver == ZTRUE) { jam(); ttcScanOp = KeyInfo20::getScanOp(regTcPtr->tcScanInfo); @@ -3668,36 +3694,75 @@ void Dblqh::prepareContinueAfterBlockedLab(Signal* signal) /* BEFORE SENDING THE MESSAGE THE REQUEST INFORMATION IS SET */ /* PROPERLY. */ /* ----------------------------------------------------------------- */ -#if 0 - if (regTcPtr->tableref != 0) { + if (TRACENR_FLAG) + { switch (regTcPtr->operation) { - case ZREAD: ndbout << "Läsning "; break; - case ZUPDATE: ndbout << " Uppdatering "; break; - case ZWRITE: ndbout << "Write "; break; - case ZINSERT: ndbout << "Inläggning "; break; - case ZDELETE: ndbout << "Borttagning "; break; - default: ndbout << "????"; break; + case ZREAD: TRACENR("READ"); break; + case ZUPDATE: TRACENR("UPDATE"); break; + case ZWRITE: TRACENR("WRITE"); break; + case ZINSERT: TRACENR("INSERT"); break; + case ZDELETE: TRACENR("DELETE"); break; + default: TRACENR("<Unknown: " << regTcPtr->operation << ">"); break; } - ndbout << "med nyckel = " << regTcPtr->tupkeyData[0] << endl; + + TRACENR(" tab: " << regTcPtr->tableref + << " frag: " << regTcPtr->fragmentid + << " activeCreat: " << (Uint32)activeCreat); + if (LqhKeyReq::getNrCopyFlag(regTcPtr->reqinfo)) + TRACENR(" NrCopy"); + if (LqhKeyReq::getRowidFlag(regTcPtr->reqinfo)) + TRACENR(" rowid: " << regTcPtr->m_row_id); + TRACENR(" key: " << regTcPtr->tupkeyData[0]); } -#endif - regTcPtr->transactionState = TcConnectionrec::WAIT_ACC; - taccreq = regTcPtr->operation; - taccreq = taccreq + (regTcPtr->opSimple << 3); - taccreq = taccreq + (regTcPtr->lockType << 4); - taccreq = taccreq + (regTcPtr->dirtyOp << 6); - taccreq = taccreq + (regTcPtr->replicaType << 7); - taccreq = taccreq + (regTcPtr->apiVersionNo << 9); + if (likely(activeCreat == Fragrecord::AC_NORMAL)) + { + if (TRACENR_FLAG) + TRACENR(endl); + ndbassert(!LqhKeyReq::getNrCopyFlag(regTcPtr->reqinfo)); + exec_acckeyreq(signal, tcConnectptr); + } + else if (activeCreat == Fragrecord::AC_NR_COPY) + { + handle_nr_copy(signal, tcConnectptr); + } + else + { + ndbassert(activeCreat == Fragrecord::AC_IGNORED); + if (TRACENR_FLAG) + TRACENR(" IGNORING (activeCreat == 2)" << endl); + + signal->theData[0] = tc_ptr_i; + regTcPtr->transactionState = TcConnectionrec::WAIT_ACC_ABORT; + + signal->theData[0] = regTcPtr->tupConnectrec; + EXECUTE_DIRECT(DBTUP, GSN_TUP_ABORTREQ, signal, 1); + jamEntry(); + + execACC_ABORTCONF(signal); + } +} + +void +Dblqh::exec_acckeyreq(Signal* signal, TcConnectionrecPtr regTcPtr) +{ + Uint32 taccreq; + regTcPtr.p->transactionState = TcConnectionrec::WAIT_ACC; + taccreq = regTcPtr.p->operation; + taccreq = taccreq + (regTcPtr.p->opSimple << 3); + taccreq = taccreq + (regTcPtr.p->lockType << 4); + taccreq = taccreq + (regTcPtr.p->dirtyOp << 6); + taccreq = taccreq + (regTcPtr.p->replicaType << 7); + taccreq = taccreq + (regTcPtr.p->apiVersionNo << 9); /* ************ */ /* ACCKEYREQ < */ /* ************ */ Uint32 sig0, sig1, sig2, sig3, sig4; - sig0 = regTcPtr->accConnectrec; + sig0 = regTcPtr.p->accConnectrec; sig1 = fragptr.p->accFragptr; - sig2 = regTcPtr->hashValue; - sig3 = regTcPtr->primKeyLen; - sig4 = regTcPtr->transid[0]; + sig2 = regTcPtr.p->hashValue; + sig3 = regTcPtr.p->primKeyLen; + sig4 = regTcPtr.p->transid[0]; signal->theData[0] = sig0; signal->theData[1] = sig1; signal->theData[2] = taccreq; @@ -3705,39 +3770,430 @@ void Dblqh::prepareContinueAfterBlockedLab(Signal* signal) signal->theData[4] = sig3; signal->theData[5] = sig4; - sig0 = regTcPtr->transid[1]; - sig1 = regTcPtr->tupkeyData[0]; - sig2 = regTcPtr->tupkeyData[1]; - sig3 = regTcPtr->tupkeyData[2]; - sig4 = regTcPtr->tupkeyData[3]; + sig0 = regTcPtr.p->transid[1]; + sig1 = regTcPtr.p->tupkeyData[0]; + sig2 = regTcPtr.p->tupkeyData[1]; + sig3 = regTcPtr.p->tupkeyData[2]; + sig4 = regTcPtr.p->tupkeyData[3]; signal->theData[6] = sig0; signal->theData[7] = sig1; signal->theData[8] = sig2; signal->theData[9] = sig3; signal->theData[10] = sig4; - if (regTcPtr->primKeyLen > 4) { + if (regTcPtr.p->primKeyLen > 4) { sendKeyinfoAcc(signal, 11); }//if - EXECUTE_DIRECT(refToBlock(regTcPtr->tcAccBlockref), GSN_ACCKEYREQ, - signal, 7 + regTcPtr->primKeyLen); + EXECUTE_DIRECT(refToBlock(regTcPtr.p->tcAccBlockref), GSN_ACCKEYREQ, + signal, 7 + regTcPtr.p->primKeyLen); if (signal->theData[0] < RNIL) { - signal->theData[0] = tc_ptr_i; + signal->theData[0] = regTcPtr.i; execACCKEYCONF(signal); return; } else if (signal->theData[0] == RNIL) { ; } else { ndbrequire(signal->theData[0] == (UintR)-1); - signal->theData[0] = tc_ptr_i; + signal->theData[0] = regTcPtr.i; execACCKEYREF(signal); }//if return; }//Dblqh::prepareContinueAfterBlockedLab() -/* ========================================================================== */ -/* ======= SEND KEYINFO TO ACC ======= */ -/* */ -/* ========================================================================== */ +void +Dblqh::handle_nr_copy(Signal* signal, Ptr<TcConnectionrec> regTcPtr) +{ + jam(); + Uint32 tableId = regTcPtr.p->tableref; + Uint32 fragPtr = fragptr.p->tupFragptr; + Uint32 op = regTcPtr.p->operation; + + const bool copy = LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo); + + if (!LqhKeyReq::getRowidFlag(regTcPtr.p->reqinfo)) + { + /** + * Rowid not set, that mean that primary has finished copying... + */ + jam(); + if (TRACENR_FLAG) + TRACENR(" Waiting for COPY_ACTIVEREQ" << endl); + ndbassert(!LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo)); + regTcPtr.p->activeCreat = Fragrecord::AC_NORMAL; + exec_acckeyreq(signal, regTcPtr); + return; + } + + regTcPtr.p->m_nr_delete.m_cnt = 1; // Wait for real op aswell + Uint32* dst = signal->theData+24; + bool uncommitted; + const int len = c_tup->nr_read_pk(fragPtr, ®TcPtr.p->m_row_id, dst, + uncommitted); + const bool match = (len>0) ? compare_key(regTcPtr.p, dst, len) == 0 : false; + + if (TRACENR_FLAG) + TRACENR(" len: " << len << " match: " << match + << " uncommitted: " << uncommitted); + + if (copy) + { + ndbassert(LqhKeyReq::getGCIFlag(regTcPtr.p->reqinfo)); + if (match) + { + /** + * Case 1 + */ + jam(); + ndbassert(op == ZINSERT); + if (TRACENR_FLAG) + TRACENR(" Changing from INSERT to ZUPDATE" << endl); + regTcPtr.p->operation = ZUPDATE; + goto run; + } + else if (len > 0 && op == ZDELETE) + { + /** + * Case 4 + * Perform delete using rowid + * primKeyLen == 0 + * tupkeyData[0] == rowid + */ + jam(); + ndbassert(regTcPtr.p->primKeyLen == 0); + if (TRACENR_FLAG) + TRACENR(" performing DELETE key: " + << dst[0] << endl); + regTcPtr.p->tupkeyData[0] = regTcPtr.p->m_row_id.ref(); + if (g_key_descriptor_pool.getPtr(tableId)->hasCharAttr) + { + regTcPtr.p->hashValue = calculateHash(tableId, dst); + } + else + { + regTcPtr.p->hashValue = md5_hash((Uint64*)dst, len); + } + goto run; + } + else if (len == 0 && op == ZDELETE) + { + /** + * Case 7 + */ + jam(); + if (TRACENR_FLAG) + TRACENR(" UPDATE_GCI" << endl); + c_tup->nr_update_gci(fragPtr, ®TcPtr.p->m_row_id, regTcPtr.p->gci); + goto update_gci_ignore; + } + + /** + * 1) Delete row at specified rowid (if len > 0) + * 2) Delete specified row at different rowid (if exists) + * 3) Run insert + */ + if (len > 0) + { + /** + * 1) Delete row at specified rowid (if len > 0) + */ + jam(); + nr_copy_delete_row(signal, regTcPtr, ®TcPtr.p->m_row_id, len); + } + /** + * 2) Delete specified row at different rowid (if exists) + */ + jam(); + nr_copy_delete_row(signal, regTcPtr, 0, 0); + if (TRACENR_FLAG) + TRACENR(" RUN INSERT" << endl); + goto run; + } + else + { + if (!match && op != ZINSERT) + { + jam(); + if (TRACENR_FLAG) + TRACENR(" IGNORE " << endl); + goto ignore; + } + if (match) + { + jam(); + if (op != ZDELETE) + { + if (TRACENR_FLAG) + TRACENR(" Changing from to ZWRITE" << endl); + regTcPtr.p->operation = ZWRITE; + } + goto run; + } + + /** + * 1) Delete row at specified rowid (if len > 0) + * 2) Delete specified row at different rowid (if exists) + * 3) Run insert + */ + if (len > 0) + { + /** + * 1) Delete row at specified rowid (if len > 0) + */ + jam(); + nr_copy_delete_row(signal, regTcPtr, ®TcPtr.p->m_row_id, len); + } + + /** + * 2) Delete specified row at different rowid (if exists) + */ + jam(); + nr_copy_delete_row(signal, regTcPtr, 0, 0); + if (TRACENR_FLAG) + TRACENR(" RUN op: " << op << endl); + goto run; + } + +run: + jam(); + exec_acckeyreq(signal, regTcPtr); + return; + +ignore: + jam(); + ndbassert(!LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo)); +update_gci_ignore: + regTcPtr.p->activeCreat = Fragrecord::AC_IGNORED; + signal->theData[0] = regTcPtr.p->tupConnectrec; + EXECUTE_DIRECT(DBTUP, GSN_TUP_ABORTREQ, signal, 1); + + regTcPtr.p->transactionState = TcConnectionrec::WAIT_ACC_ABORT; + signal->theData[0] = regTcPtr.i; + execACC_ABORTCONF(signal); +} + +int +Dblqh::compare_key(const TcConnectionrec* regTcPtr, + const Uint32 * ptr, Uint32 len) +{ + if (regTcPtr->primKeyLen != len) + return 1; + + if (len <= 4) + return memcmp(ptr, regTcPtr->tupkeyData, 4*len); + + if (memcmp(ptr, regTcPtr->tupkeyData, sizeof(regTcPtr->tupkeyData))) + return 1; + + len -= (sizeof(regTcPtr->tupkeyData) >> 2); + ptr += (sizeof(regTcPtr->tupkeyData) >> 2); + + DatabufPtr regDatabufptr; + regDatabufptr.i = tcConnectptr.p->firstTupkeybuf; + ptrCheckGuard(regDatabufptr, cdatabufFileSize, databuf); + while(len > 4) + { + if (memcmp(ptr, regDatabufptr.p, 4*4)) + return 1; + + ptr += 4; + len -= 4; + regDatabufptr.i = regDatabufptr.p->nextDatabuf; + ptrCheckGuard(regDatabufptr, cdatabufFileSize, databuf); + } + + if (memcmp(ptr, regDatabufptr.p, 4*len)) + return 1; + + return 0; +} + +void +Dblqh::nr_copy_delete_row(Signal* signal, + Ptr<TcConnectionrec> regTcPtr, + Local_key* rowid, Uint32 len) +{ + Ptr<Fragrecord> fragPtr = fragptr; + + Uint32 keylen; + Uint32 tableId = regTcPtr.p->tableref; + Uint32 accPtr = regTcPtr.p->accConnectrec; + + signal->theData[0] = accPtr; + signal->theData[1] = fragptr.p->accFragptr; + signal->theData[2] = ZDELETE + (ZDELETE << 4); + signal->theData[5] = regTcPtr.p->transid[0]; + signal->theData[6] = regTcPtr.p->transid[1]; + + if (rowid) + { + jam(); + keylen = 1; + if (g_key_descriptor_pool.getPtr(tableId)->hasCharAttr) + { + signal->theData[3] = calculateHash(tableId, signal->theData+24); + } + else + { + signal->theData[3] = md5_hash((Uint64*)(signal->theData+24), len); + } + signal->theData[4] = 0; // seach by local key + signal->theData[7] = rowid->ref(); + } + else + { + jam(); + keylen = regTcPtr.p->primKeyLen; + signal->theData[3] = regTcPtr.p->hashValue; + signal->theData[4] = keylen; + signal->theData[7] = regTcPtr.p->tupkeyData[0]; + signal->theData[8] = regTcPtr.p->tupkeyData[1]; + signal->theData[9] = regTcPtr.p->tupkeyData[2]; + signal->theData[10] = regTcPtr.p->tupkeyData[3]; + if (keylen > 4) + sendKeyinfoAcc(signal, 11); + } + const Uint32 ref = refToBlock(regTcPtr.p->tcAccBlockref); + EXECUTE_DIRECT(ref, GSN_ACCKEYREQ, signal, 7 + keylen); + jamEntry(); + + Uint32 retValue = signal->theData[0]; + ndbrequire(retValue != RNIL); // This should never block... + ndbrequire(retValue != (Uint32)-1 || rowid == 0); // rowid should never fail + + if (retValue == (Uint32)-1) + { + /** + * Only delete by pk, may fail + */ + jam(); + ndbrequire(rowid == 0); + signal->theData[0] = accPtr; + signal->theData[1] = false; + EXECUTE_DIRECT(ref, GSN_ACC_ABORTREQ, signal, 2); + jamEntry(); + return; + } + + /** + * We found row (and have it locked in ACC) + */ + ndbrequire(regTcPtr.p->m_dealloc == 0); + Local_key save = regTcPtr.p->m_row_id; + signal->theData[0] = regTcPtr.p->accConnectrec; + EXECUTE_DIRECT(ref, GSN_ACC_COMMITREQ, signal, 1); + jamEntry(); + + ndbrequire(regTcPtr.p->m_dealloc == 1); + int ret = c_tup->nr_delete(signal, regTcPtr.i, + fragPtr.p->tupFragptr, ®TcPtr.p->m_row_id, + regTcPtr.p->gci); + jamEntry(); + + if (ret) + { + ndbassert(ret == 1); + Uint32 pos = regTcPtr.p->m_nr_delete.m_cnt - 1; + memcpy(regTcPtr.p->m_nr_delete.m_disk_ref + pos, + signal->theData, sizeof(Local_key)); + regTcPtr.p->m_nr_delete.m_page_id[pos] = RNIL; + regTcPtr.p->m_nr_delete.m_cnt = pos + 2; + ndbout << "PENDING DISK DELETE: " << + regTcPtr.p->m_nr_delete.m_disk_ref[pos] << endl; + } + + TRACENR("DELETED: " << regTcPtr.p->m_row_id << endl); + + regTcPtr.p->m_dealloc = 0; + regTcPtr.p->m_row_id = save; + fragptr = fragPtr; + tcConnectptr = regTcPtr; +} + +void +Dblqh::get_nr_op_info(Nr_op_info* op, Uint32 page_id) +{ + Ptr<TcConnectionrec> tcPtr; + tcPtr.i = op->m_ptr_i; + ptrCheckGuard(tcPtr, ctcConnectrecFileSize, tcConnectionrec); + + Ptr<Fragrecord> fragPtr; + c_fragment_pool.getPtr(fragPtr, tcPtr.p->fragmentptr); + + op->m_gci = tcPtr.p->gci; + op->m_tup_frag_ptr_i = fragPtr.p->tupFragptr; + + ndbrequire(tcPtr.p->transactionState == TcConnectionrec::WAIT_TUP_COMMIT); + ndbrequire(tcPtr.p->activeCreat == Fragrecord::AC_NR_COPY); + ndbrequire(tcPtr.p->m_nr_delete.m_cnt); + + + if (page_id == RNIL) + { + // get log buffer callback + for (Uint32 i = 0; i<2; i++) + { + if (tcPtr.p->m_nr_delete.m_page_id[i] != RNIL) + { + op->m_page_id = tcPtr.p->m_nr_delete.m_page_id[i]; + op->m_disk_ref = tcPtr.p->m_nr_delete.m_disk_ref[i]; + return; + } + } + } + else + { + // get page callback + for (Uint32 i = 0; i<2; i++) + { + Local_key key = tcPtr.p->m_nr_delete.m_disk_ref[i]; + if (op->m_disk_ref.m_page_no == key.m_page_no && + op->m_disk_ref.m_file_no == key.m_file_no && + tcPtr.p->m_nr_delete.m_page_id[i] == RNIL) + { + op->m_disk_ref = key; + tcPtr.p->m_nr_delete.m_page_id[i] = page_id; + return; + } + } + } + ndbrequire(false); +} + +void +Dblqh::nr_delete_complete(Signal* signal, Nr_op_info* op) +{ + jamEntry(); + Ptr<TcConnectionrec> tcPtr; + tcPtr.i = op->m_ptr_i; + ptrCheckGuard(tcPtr, ctcConnectrecFileSize, tcConnectionrec); + + ndbrequire(tcPtr.p->transactionState == TcConnectionrec::WAIT_TUP_COMMIT); + ndbrequire(tcPtr.p->activeCreat == Fragrecord::AC_NR_COPY); + ndbrequire(tcPtr.p->m_nr_delete.m_cnt); + + tcPtr.p->m_nr_delete.m_cnt--; + if (tcPtr.p->m_nr_delete.m_cnt == 0) + { + tcConnectptr = tcPtr; + c_fragment_pool.getPtr(fragptr, tcPtr.p->fragmentptr); + packLqhkeyreqLab(signal); + return; + } + + if (memcmp(&tcPtr.p->m_nr_delete.m_disk_ref[0], + &op->m_disk_ref, sizeof(Local_key)) == 0) + { + jam(); + ndbassert(tcPtr.p->m_nr_delete.m_page_id[0] != RNIL); + tcPtr.p->m_nr_delete.m_page_id[0] = tcPtr.p->m_nr_delete.m_page_id[1]; + tcPtr.p->m_nr_delete.m_disk_ref[0] = tcPtr.p->m_nr_delete.m_disk_ref[1]; + } +} + + +/* =*======================================================================= */ +/* ======= SEND KEYINFO TO ACC ======= */ +/* */ +/* ========================================================================= */ void Dblqh::sendKeyinfoAcc(Signal* signal, Uint32 Ti) { DatabufPtr regDatabufptr; @@ -3778,6 +4234,43 @@ void Dblqh::execLQH_ALLOCREQ(Signal* signal) EXECUTE_DIRECT(tup, GSN_TUP_ALLOCREQ, signal, 3); }//Dblqh::execTUP_ALLOCREQ() +void Dblqh::execTUP_DEALLOCREQ(Signal* signal) +{ + TcConnectionrecPtr regTcPtr; + + jamEntry(); + regTcPtr.i = signal->theData[4]; + + if (TRACENR_FLAG) + { + Local_key tmp; + tmp.m_page_no = signal->theData[2]; + tmp.m_page_idx = signal->theData[3]; + TRACENR("TUP_DEALLOC: " << tmp << + (signal->theData[5] ? " DIRECT " : " DELAYED") << endl); + } + + if (signal->theData[5]) + { + jam(); + Local_key tmp; + tmp.m_page_no = signal->theData[2]; + tmp.m_page_idx = signal->theData[3]; + EXECUTE_DIRECT(DBTUP, GSN_TUP_DEALLOCREQ, signal, signal->getLength()); + return; + } + else + { + jam(); + ptrCheckGuard(regTcPtr, ctcConnectrecFileSize, tcConnectionrec); + regTcPtr.p->m_row_id.m_page_no = signal->theData[2]; + regTcPtr.p->m_row_id.m_page_idx = signal->theData[3]; + + ndbrequire(regTcPtr.p->m_dealloc == 0); + regTcPtr.p->m_dealloc = 1; + } +}//Dblqh::execTUP_ALLOCREQ() + /* ************>> */ /* ACCKEYCONF > */ /* ************>> */ @@ -3799,7 +4292,6 @@ void Dblqh::execACCKEYCONF(Signal* signal) }//if // reset the activeCreat since that is only valid in cases where the record was not present. - regTcPtr->activeCreat = ZFALSE; /* ------------------------------------------------------------------------ * IT IS NOW TIME TO CONTACT THE TUPLE MANAGER. THE TUPLE MANAGER NEEDS THE * INFORMATION ON WHICH TABLE AND FRAGMENT, THE LOCAL KEY AND IT NEEDS TO @@ -3810,18 +4302,30 @@ void Dblqh::execACCKEYCONF(Signal* signal) * ----------------------------------------------------------------------- */ if (regTcPtr->operation == ZWRITE) { + ndbassert(regTcPtr->seqNoReplica == 0 || + regTcPtr->activeCreat == Fragrecord::AC_NR_COPY); Uint32 op= signal->theData[1]; + Uint32 requestInfo = regTcPtr->reqinfo; if(likely(op == ZINSERT || op == ZUPDATE)) { + jam(); regTcPtr->operation = op; } else { + jam(); warningEvent("Convering %d to ZUPDATE", op); op = regTcPtr->operation = ZUPDATE; } + if (regTcPtr->seqNoReplica == 0) + { + jam(); + requestInfo &= ~(RI_OPERATION_MASK << RI_OPERATION_SHIFT); + LqhKeyReq::setOperation(requestInfo, op); + regTcPtr->reqinfo = requestInfo; + } }//if - + /* ------------------------------------------------------------------------ * IT IS NOW TIME TO CONTACT THE TUPLE MANAGER. THE TUPLE MANAGER NEEDS THE * INFORMATION ON WHICH TABLE AND FRAGMENT, THE LOCAL KEY AND IT NEEDS TO @@ -3847,6 +4351,7 @@ Dblqh::acckeyconf_tupkeyreq(Signal* signal, TcConnectionrec* regTcPtr, Uint32 local_key, Uint32 disk_page) { + Uint32 op = regTcPtr->operation; regTcPtr->transactionState = TcConnectionrec::WAIT_TUP; /* ------------------------------------------------------------------------ * IT IS NOW TIME TO CONTACT THE TUPLE MANAGER. THE TUPLE MANAGER NEEDS THE @@ -3856,17 +4361,18 @@ Dblqh::acckeyconf_tupkeyreq(Signal* signal, TcConnectionrec* regTcPtr, * IS NEEDED SINCE TWO SCHEMA VERSIONS CAN BE ACTIVE SIMULTANEOUSLY ON A * TABLE. * ----------------------------------------------------------------------- */ - Uint32 localKey2 = local_key & MAX_TUPLES_PER_PAGE; - Uint32 localKey1 = local_key >> MAX_TUPLES_BITS; + Uint32 page_idx = local_key & MAX_TUPLES_PER_PAGE; + Uint32 page_no = local_key >> MAX_TUPLES_BITS; #ifdef TRACE_LQHKEYREQ - ndbout << "localkey: [ " << hex << localKey1 << " " << localKey2 << "]" + ndbout << "localkey: [ " << hex << page_no << " " << page_idx << "]" << endl; #endif Uint32 Ttupreq = regTcPtr->dirtyOp; Ttupreq = Ttupreq + (regTcPtr->opSimple << 1); - Ttupreq = Ttupreq + (regTcPtr->operation << 6); + Ttupreq = Ttupreq + (op << 6); Ttupreq = Ttupreq + (regTcPtr->opExec << 10); Ttupreq = Ttupreq + (regTcPtr->apiVersionNo << 11); + Ttupreq = Ttupreq + (regTcPtr->m_use_rowid << 11); /* --------------------------------------------------------------------- * Clear interpreted mode bit since we do not want the next replica to @@ -3882,8 +4388,8 @@ Dblqh::acckeyconf_tupkeyreq(Signal* signal, TcConnectionrec* regTcPtr, TupKeyReq * const tupKeyReq = (TupKeyReq *)signal->getDataPtrSend(); tupKeyReq->connectPtr = sig0; tupKeyReq->request = Ttupreq; - tupKeyReq->keyRef1 = localKey1; - tupKeyReq->keyRef2 = localKey2; + tupKeyReq->keyRef1 = page_no; + tupKeyReq->keyRef2 = page_idx; sig0 = regTcPtr->totReclenAi; sig1 = regTcPtr->applOprec; @@ -3903,13 +4409,83 @@ Dblqh::acckeyconf_tupkeyreq(Signal* signal, TcConnectionrec* regTcPtr, tupKeyReq->transId1 = sig1; tupKeyReq->transId2 = sig2; tupKeyReq->fragPtr = sig3; + + sig0 = regTcPtr->m_row_id.m_page_no; + sig1 = regTcPtr->m_row_id.m_page_idx; + tupKeyReq->primaryReplica = (tcConnectptr.p->seqNoReplica == 0)?true:false; tupKeyReq->coordinatorTC = tcConnectptr.p->tcBlockref; tupKeyReq->tcOpIndex = tcConnectptr.p->tcOprec; tupKeyReq->savePointId = tcConnectptr.p->savePointId; tupKeyReq->disk_page= disk_page; + tupKeyReq->m_row_id_page_no = sig0; + tupKeyReq->m_row_id_page_idx = sig1; + + if (ERROR_INSERTED(5712) && regTcPtr->operation == ZINSERT) + { + ndbout << "INSERT " << regFragptrP->tabRef + << "(" << regFragptrP->fragId << ")"; + + { + ndbout << "key=[" << hex; + Uint32 i; + for(i = 0; i<regTcPtr->primKeyLen && i < 4; i++){ + ndbout << hex << regTcPtr->tupkeyData[i] << " "; + } + + DatabufPtr regDatabufptr; + regDatabufptr.i = regTcPtr->firstTupkeybuf; + while(i < regTcPtr->primKeyLen) + { + ptrCheckGuard(regDatabufptr, cdatabufFileSize, databuf); + for(Uint32 j = 0; j<4 && i<regTcPtr->primKeyLen; j++, i++) + ndbout << hex << regDatabufptr.p->data[j] << " "; + } + ndbout << "] "; + } + + if(regTcPtr->m_use_rowid) + ndbout << " " << regTcPtr->m_row_id; + } + + if (ERROR_INSERTED(5712) && regTcPtr->operation == ZDELETE) + { + Local_key lk; lk.assref(local_key); + + ndbout << "DELETE " << regFragptrP->tabRef + << "(" << regFragptrP->fragId << ") " << lk; + + { + ndbout << "key=[" << hex; + Uint32 i; + for(i = 0; i<regTcPtr->primKeyLen && i < 4; i++){ + ndbout << hex << regTcPtr->tupkeyData[i] << " "; + } + + DatabufPtr regDatabufptr; + regDatabufptr.i = regTcPtr->firstTupkeybuf; + while(i < regTcPtr->primKeyLen) + { + ptrCheckGuard(regDatabufptr, cdatabufFileSize, databuf); + for(Uint32 j = 0; j<4 && i<regTcPtr->primKeyLen; j++, i++) + ndbout << hex << regDatabufptr.p->data[j] << " "; + } + ndbout << "]" << endl; + } + + } + + regTcPtr->m_use_rowid |= (op == ZINSERT); + regTcPtr->m_row_id.m_page_no = page_no; + regTcPtr->m_row_id.m_page_idx = page_idx; + EXECUTE_DIRECT(tup, GSN_TUPKEYREQ, signal, TupKeyReq::SignalLength); + + if (ERROR_INSERTED(5712) && regTcPtr->operation == ZINSERT) + { + ndbout << endl; + } }//Dblqh::execACCKEYCONF() void @@ -3928,7 +4504,7 @@ Dblqh::acckeyconf_load_diskpage(Signal* signal, TcConnectionrecPtr regTcPtr, else if(res == 0) { regTcPtr.p->transactionState = TcConnectionrec::WAIT_TUP; - regTcPtr.p->m_local_key = local_key; + regTcPtr.p->m_row_id.assref(local_key); } else { @@ -3956,7 +4532,7 @@ Dblqh::acckeyconf_load_diskpage_callback(Signal* signal, c_fragment_pool.getPtr(fragPtr, regTcPtr->fragmentptr); acckeyconf_tupkeyreq(signal, regTcPtr, fragPtr.p, - regTcPtr->m_local_key, + regTcPtr->m_row_id.ref(), disk_page); } else if (state != TcConnectionrec::WAIT_TUP) @@ -3999,7 +4575,6 @@ void Dblqh::tupkeyConfLab(Signal* signal) * WE CAN GO IMMEDIATELY TO COMMIT_CONTINUE_AFTER_BLOCKED. * WE HAVE ALREADY SENT THE RESPONSE SO WE ARE NOT INTERESTED IN READ LENGTH * ---------------------------------------------------------------------- */ - regTcPtr->gci = cnewestGci; commitContinueAfterBlockedLab(signal); return; }//if @@ -4045,7 +4620,6 @@ void Dblqh::rwConcludedLab(Signal* signal) * THIS OPERATION WAS A WRITE OPERATION THAT DO NOT NEED LOGGING AND * THAT CAN CAN BE COMMITTED IMMEDIATELY. * ----------------------------------------------------------------- */ - regTcPtr->gci = cnewestGci; commitContinueAfterBlockedLab(signal); return; } else { @@ -4092,7 +4666,6 @@ void Dblqh::rwConcludedAiLab(Signal* signal) * THE OPERATION IS A SIMPLE READ. WE WILL IMMEDIATELY COMMIT THE * OPERATION. * -------------------------------------------------------------------- */ - regTcPtr->gci = cnewestGci; localCommitLab(signal); return; } else { @@ -4113,12 +4686,11 @@ void Dblqh::rwConcludedAiLab(Signal* signal) /* ------------------------------------------------------------------ * THIS OPERATION WAS A WRITE OPERATION THAT DO NOT NEED LOGGING AND * THAT CAN CAN BE COMMITTED IMMEDIATELY. - * ------------------------------------------------------------------ */ + * ----------------------------------------------------------------- */ jam(); - /* ---------------------------------------------------------------- - * IT MUST BE ACTIVE CREATION OF A FRAGMENT. - * ---------------------------------------------------------------- */ - regTcPtr->gci = cnewestGci; + /* ---------------------------------------------------------------- + * IT MUST BE ACTIVE CREATION OF A FRAGMENT. + * ---------------------------------------------------------------- */ localCommitLab(signal); return; } else { @@ -4179,7 +4751,7 @@ void Dblqh::logLqhkeyreqLab(Signal* signal) return; }//if TcConnectionrec * const regTcPtr = tcConnectptr.p; - logPartPtr.i = regTcPtr->hashValue & 3; + logPartPtr.i = regTcPtr->m_log_part_ptr_i; ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord); /* -------------------------------------------------- */ /* THIS PART IS USED TO WRITE THE LOG */ @@ -4344,7 +4916,6 @@ void Dblqh::logLqhkeyreqLab(Signal* signal) /* ---------------------------------------------------------------------- * DIRTY OPERATIONS SHOULD COMMIT BEFORE THEY PACK THE REQUEST/RESPONSE. * ---------------------------------------------------------------------- */ - regTcPtr->gci = cnewestGci; localCommitLab(signal); }//if }//Dblqh::logLqhkeyreqLab() @@ -4415,11 +4986,35 @@ void Dblqh::packLqhkeyreqLab(Signal* signal) UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6; Treqinfo = preComputedRequestInfoMask & regTcPtr->reqinfo; + Uint32 nextNodeId = regTcPtr->nextReplica; + Uint32 nextVersion = getNodeInfo(nextNodeId).m_version; + UintR TapplAddressIndicator = (regTcPtr->nextSeqNoReplica == 0 ? 0 : 1); LqhKeyReq::setApplicationAddressFlag(Treqinfo, TapplAddressIndicator); LqhKeyReq::setInterpretedFlag(Treqinfo, regTcPtr->opExec); LqhKeyReq::setSeqNoReplica(Treqinfo, regTcPtr->nextSeqNoReplica); LqhKeyReq::setAIInLqhKeyReq(Treqinfo, regTcPtr->reclenAiLqhkey); + + if (unlikely(nextVersion < NDBD_ROWID_VERSION)) + { + LqhKeyReq::setLockType(Treqinfo, regTcPtr->lockType); + } + else + { + regTcPtr->m_use_rowid |= + fragptr.p->m_copy_started_state == Fragrecord::AC_NR_COPY; + LqhKeyReq::setRowidFlag(Treqinfo, regTcPtr->m_use_rowid); + } + + if (LqhKeyReq::getRowidFlag(Treqinfo)) + { + //ndbassert(LqhKeyReq::getOperation(Treqinfo) == ZINSERT); + } + else + { + ndbassert(LqhKeyReq::getOperation(Treqinfo) != ZINSERT); + } + UintR TreadLenAiInd = (regTcPtr->readlenAi == 0 ? 0 : 1); UintR TsameLqhAndClient = (tcConnectptr.i == regTcPtr->tcOprec ? 0 : 1); @@ -4491,6 +5086,16 @@ void Dblqh::packLqhkeyreqLab(Signal* signal) nextPos += 4; }//if + sig0 = regTcPtr->gci; + Local_key tmp = regTcPtr->m_row_id; + + lqhKeyReq->variableData[nextPos + 0] = tmp.m_page_no; + lqhKeyReq->variableData[nextPos + 1] = tmp.m_page_idx; + nextPos += 2*LqhKeyReq::getRowidFlag(Treqinfo); + + lqhKeyReq->variableData[nextPos + 0] = sig0; + nextPos += LqhKeyReq::getGCIFlag(Treqinfo); + sig0 = regTcPtr->firstAttrinfo[0]; sig1 = regTcPtr->firstAttrinfo[1]; sig2 = regTcPtr->firstAttrinfo[2]; @@ -4636,7 +5241,9 @@ void Dblqh::writeLogHeader(Signal* signal) Uint32 operation = tcConnectptr.p->operation; Uint32 keyLen = tcConnectptr.p->primKeyLen; Uint32 aiLen = tcConnectptr.p->currTupAiLen; - Uint32 totLogLen = aiLen + keyLen + ZLOG_HEAD_SIZE; + Local_key rowid = tcConnectptr.p->m_row_id; + Uint32 totLogLen = ZLOG_HEAD_SIZE + aiLen + keyLen; + if ((logPos + ZLOG_HEAD_SIZE) < ZPAGE_SIZE) { Uint32* dataPtr = &logPagePtr.p->logPageWord[logPos]; logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos + ZLOG_HEAD_SIZE; @@ -4646,6 +5253,8 @@ void Dblqh::writeLogHeader(Signal* signal) dataPtr[3] = operation; dataPtr[4] = aiLen; dataPtr[5] = keyLen; + dataPtr[6] = rowid.m_page_no; + dataPtr[7] = rowid.m_page_idx; } else { writeLogWord(signal, ZPREP_OP_TYPE); writeLogWord(signal, totLogLen); @@ -4653,6 +5262,8 @@ void Dblqh::writeLogHeader(Signal* signal) writeLogWord(signal, operation); writeLogWord(signal, aiLen); writeLogWord(signal, keyLen); + writeLogWord(signal, rowid.m_page_no); + writeLogWord(signal, rowid.m_page_idx); }//if }//Dblqh::writeLogHeader() @@ -4840,6 +5451,22 @@ void Dblqh::releaseOprec(Signal* signal) regTcPtr->lastAttrinbuf = RNIL; regTcPtr->firstTupkeybuf = RNIL; regTcPtr->lastTupkeybuf = RNIL; + + if (regTcPtr->m_dealloc) + { + jam(); + regTcPtr->m_dealloc = 0; + + if (TRACENR_FLAG) + TRACENR("DELETED: " << regTcPtr->m_row_id << endl); + + signal->theData[0] = regTcPtr->fragmentid; + signal->theData[1] = regTcPtr->tableref; + signal->theData[2] = regTcPtr->m_row_id.m_page_no; + signal->theData[3] = regTcPtr->m_row_id.m_page_idx; + signal->theData[4] = RNIL; + EXECUTE_DIRECT(DBTUP, GSN_TUP_DEALLOCREQ, signal, 5); + } }//Dblqh::releaseOprec() /* ------------------------------------------------------------------------- */ @@ -5169,15 +5796,24 @@ void Dblqh::execCOMPLETE(Signal* signal) if ((tcConnectptr.p->transactionState == TcConnectionrec::COMMITTED) && (tcConnectptr.p->transid[0] == transid1) && (tcConnectptr.p->transid[1] == transid2)) { - if (tcConnectptr.p->seqNoReplica != 0) { + if (tcConnectptr.p->seqNoReplica != 0 && + tcConnectptr.p->activeCreat == Fragrecord::AC_NORMAL) { jam(); localCommitLab(signal); return; - } else { + } + else if (tcConnectptr.p->seqNoReplica == 0) + { jam(); completeTransLastLab(signal); return; - }//if + } + else + { + jam(); + completeTransNotLastLab(signal); + return; + } }//if if (tcConnectptr.p->transactionState != TcConnectionrec::COMMITTED) { warningReport(signal, 2); @@ -5255,15 +5891,21 @@ void Dblqh::execCOMPLETEREQ(Signal* signal) return; break; }//switch - if (regTcPtr->seqNoReplica != 0) { + if (regTcPtr->seqNoReplica != 0 && + regTcPtr->activeCreat != Fragrecord::AC_NR_COPY) { jam(); localCommitLab(signal); - return; - } else { + } + else if (regTcPtr->seqNoReplica == 0) + { jam(); completeTransLastLab(signal); - return; - }//if + } + else + { + jam(); + completeTransNotLastLab(signal); + } }//Dblqh::execCOMPLETEREQ() /* ************> */ @@ -5310,7 +5952,6 @@ void Dblqh::commitReqLab(Signal* signal, Uint32 gci) TcConnectionrec * const regTcPtr = tcConnectptr.p; TcConnectionrec::LogWriteState logWriteState = regTcPtr->logWriteState; TcConnectionrec::TransactionState transState = regTcPtr->transactionState; - ndbrequire(regTcPtr->gci == gci || regTcPtr->gci == 0); regTcPtr->gci = gci; if (transState == TcConnectionrec::PREPARED) { if (logWriteState == TcConnectionrec::WRITTEN) { @@ -5361,12 +6002,13 @@ void Dblqh::commitReqLab(Signal* signal, Uint32 gci) warningReport(signal, 0); return; }//if - if (regTcPtr->seqNoReplica != 0) { + if (regTcPtr->seqNoReplica == 0 || + regTcPtr->activeCreat == Fragrecord::AC_NR_COPY) { jam(); - commitReplyLab(signal); + localCommitLab(signal); return; }//if - localCommitLab(signal); + commitReplyLab(signal); return; }//Dblqh::commitReqLab() @@ -5380,7 +6022,6 @@ void Dblqh::execLQH_WRITELOG_REQ(Signal* signal) Uint32 newestGci = cnewestGci; TcConnectionrec::LogWriteState logWriteState = regTcPtr->logWriteState; TcConnectionrec::TransactionState transState = regTcPtr->transactionState; - ndbrequire(regTcPtr->gci == gci || regTcPtr->gci == 0); regTcPtr->gci = gci; if (gci > newestGci) { jam(); @@ -5398,7 +6039,7 @@ void Dblqh::execLQH_WRITELOG_REQ(Signal* signal) LogPartRecordPtr regLogPartPtr; Uint32 noOfLogPages = cnoOfLogPages; jam(); - regLogPartPtr.i = regTcPtr->hashValue & 3; + regLogPartPtr.i = regTcPtr->m_log_part_ptr_i; ptrCheckGuard(regLogPartPtr, clogPartFileSize, logPartRecord); if ((regLogPartPtr.p->logPartState == LogPartRecord::ACTIVE) || (noOfLogPages == 0)) { @@ -5485,62 +6126,60 @@ void Dblqh::commitContinueAfterBlockedLab(Signal* signal) /*WE MUST COMMIT TUP BEFORE ACC TO ENSURE THAT NO ONE RACES IN AND SEES A */ /*DIRTY STATE IN TUP. */ /* ------------------------------------------------------------------------- */ - TcConnectionrec * const regTcPtr = tcConnectptr.p; - Fragrecord * const regFragptr = fragptr.p; - Uint32 operation = regTcPtr->operation; - Uint32 simpleRead = regTcPtr->simpleRead; - Uint32 dirtyOp = regTcPtr->dirtyOp; - if (regTcPtr->activeCreat == ZFALSE) { - if ((cCommitBlocked == true) && - (regFragptr->fragActiveStatus == ZTRUE)) { - jam(); -/* ------------------------------------------------------------------------- */ -// TUP and/or ACC have problems in writing the undo log to disk fast enough. -// We must avoid the commit at this time and try later instead. The fragment -// is also active with a local checkpoint and this commit can generate UNDO -// log records that overflow the UNDO log buffer. -/* ------------------------------------------------------------------------- */ -/*---------------------------------------------------------------------------*/ -// We must delay the write of commit info to the log to safe-guard against -// a crash due to lack of log pages. We temporary stop all log writes to this -// log part to ensure that we don't get a buffer explosion in the delayed -// signal buffer instead. -/*---------------------------------------------------------------------------*/ - logPartPtr.i = regTcPtr->hashValue & 3; - ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord); - linkWaitLog(signal, logPartPtr); - regTcPtr->transactionState = TcConnectionrec::COMMIT_QUEUED; - if (logPartPtr.p->logPartState == LogPartRecord::IDLE) { - jam(); - logPartPtr.p->logPartState = LogPartRecord::ACTIVE; - }//if - return; - }//if + Ptr<TcConnectionrec> regTcPtr = tcConnectptr; + Ptr<Fragrecord> regFragptr = fragptr; + Uint32 operation = regTcPtr.p->operation; + Uint32 simpleRead = regTcPtr.p->simpleRead; + Uint32 dirtyOp = regTcPtr.p->dirtyOp; + if (regTcPtr.p->activeCreat != Fragrecord::AC_IGNORED) { if (operation != ZREAD) { TupCommitReq * const tupCommitReq = (TupCommitReq *)signal->getDataPtrSend(); - Uint32 sig0 = regTcPtr->tupConnectrec; - Uint32 tup = refToBlock(regTcPtr->tcTupBlockref); + Uint32 sig0 = regTcPtr.p->tupConnectrec; + Uint32 tup = refToBlock(regTcPtr.p->tcTupBlockref); jam(); tupCommitReq->opPtr = sig0; - tupCommitReq->gci = regTcPtr->gci; - tupCommitReq->hashValue = regTcPtr->hashValue; + tupCommitReq->gci = regTcPtr.p->gci; + tupCommitReq->hashValue = regTcPtr.p->hashValue; EXECUTE_DIRECT(tup, GSN_TUP_COMMITREQ, signal, TupCommitReq::SignalLength); if(signal->theData[0] != 0) { - regTcPtr->transactionState = TcConnectionrec::WAIT_TUP_COMMIT; + regTcPtr.p->transactionState = TcConnectionrec::WAIT_TUP_COMMIT; return; // TUP_COMMIT was timesliced } - Uint32 acc = refToBlock(regTcPtr->tcAccBlockref); - signal->theData[0] = regTcPtr->accConnectrec; + if (TRACENR_FLAG) + { + TRACENR("COMMIT: "); + switch (regTcPtr.p->operation) { + case ZREAD: TRACENR("READ"); break; + case ZUPDATE: TRACENR("UPDATE"); break; + case ZWRITE: TRACENR("WRITE"); break; + case ZINSERT: TRACENR("INSERT"); break; + case ZDELETE: TRACENR("DELETE"); break; + } + + TRACENR(" tab: " << regTcPtr.p->tableref + << " frag: " << regTcPtr.p->fragmentid + << " activeCreat: " << (Uint32)regTcPtr.p->activeCreat); + if (LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo)) + TRACENR(" NrCopy"); + if (LqhKeyReq::getRowidFlag(regTcPtr.p->reqinfo)) + TRACENR(" rowid: " << regTcPtr.p->m_row_id); + TRACENR(" key: " << regTcPtr.p->tupkeyData[0]); + TRACENR(endl); + } + + Uint32 acc = refToBlock(regTcPtr.p->tcAccBlockref); + signal->theData[0] = regTcPtr.p->accConnectrec; EXECUTE_DIRECT(acc, GSN_ACC_COMMITREQ, signal, 1); + } else { if(!dirtyOp){ - Uint32 acc = refToBlock(regTcPtr->tcAccBlockref); - signal->theData[0] = regTcPtr->accConnectrec; + Uint32 acc = refToBlock(regTcPtr.p->tcAccBlockref); + signal->theData[0] = regTcPtr.p->accConnectrec; EXECUTE_DIRECT(acc, GSN_ACC_COMMITREQ, signal, 1); } @@ -5553,13 +6192,17 @@ void Dblqh::commitContinueAfterBlockedLab(Signal* signal) /*RESOURCES BELONGING TO THIS OPERATION SINCE NO MORE WORK WILL BE */ /*PERFORMED. */ /* ------------------------------------------------------------------------- */ + fragptr = regFragptr; + tcConnectptr = regTcPtr; cleanUp(signal); return; }//if } }//if jamEntry(); - tupcommit_conf(signal, regTcPtr, regFragptr); + fragptr = regFragptr; + tcConnectptr = regTcPtr; + tupcommit_conf(signal, regTcPtr.p, regFragptr.p); } void @@ -5569,7 +6212,7 @@ Dblqh::tupcommit_conf_callback(Signal* signal, Uint32 tcPtrI) tcConnectptr.i = tcPtrI; ptrCheckGuard(tcConnectptr, ctcConnectrecFileSize, tcConnectionrec); - TcConnectionrec * const tcPtr = tcConnectptr.p; + TcConnectionrec * tcPtr = tcConnectptr.p; ndbrequire(tcPtr->transactionState == TcConnectionrec::WAIT_TUP_COMMIT); @@ -5583,31 +6226,45 @@ Dblqh::tupcommit_conf_callback(Signal* signal, Uint32 tcPtrI) EXECUTE_DIRECT(acc, GSN_ACC_COMMITREQ, signal, 1); jamEntry(); + tcConnectptr.i = tcPtrI; + tcConnectptr.p = tcPtr; tupcommit_conf(signal, tcPtr, regFragptr.p); } void Dblqh::tupcommit_conf(Signal* signal, - TcConnectionrec * regTcPtr, + TcConnectionrec * tcPtrP, Fragrecord * regFragptr) { - Uint32 dirtyOp = regTcPtr->dirtyOp; - Uint32 seqNoReplica = regTcPtr->seqNoReplica; - if (regTcPtr->gci > regFragptr->newestGci) { + Uint32 dirtyOp = tcPtrP->dirtyOp; + Uint32 seqNoReplica = tcPtrP->seqNoReplica; + Uint32 activeCreat = tcPtrP->activeCreat; + if (tcPtrP->gci > regFragptr->newestGci) { jam(); /* ------------------------------------------------------------------------- */ /*IT IS THE FIRST TIME THIS GLOBAL CHECKPOINT IS INVOLVED IN UPDATING THIS */ /*FRAGMENT. UPDATE THE VARIABLE THAT KEEPS TRACK OF NEWEST GCI IN FRAGMENT */ /* ------------------------------------------------------------------------- */ - regFragptr->newestGci = regTcPtr->gci; + regFragptr->newestGci = tcPtrP->gci; }//if - if (dirtyOp != ZTRUE) { - if (seqNoReplica != 0) { + if (dirtyOp != ZTRUE) + { + if (seqNoReplica == 0 || activeCreat == Fragrecord::AC_NR_COPY) + { jam(); - completeTransNotLastLab(signal); + commitReplyLab(signal); return; }//if - commitReplyLab(signal); + if (seqNoReplica == 0) + { + jam(); + completeTransLastLab(signal); + } + else + { + jam(); + completeTransNotLastLab(signal); + } return; } else { /* ------------------------------------------------------------------------- */ @@ -5615,11 +6272,28 @@ Dblqh::tupcommit_conf(Signal* signal, /*SEND ANY COMMIT OR COMPLETE MESSAGES TO OTHER NODES. THEY WILL MERELY SEND */ /*THOSE SIGNALS INTERNALLY. */ /* ------------------------------------------------------------------------- */ - if (regTcPtr->abortState == TcConnectionrec::ABORT_IDLE) { + if (tcPtrP->abortState == TcConnectionrec::ABORT_IDLE) { jam(); + if (activeCreat == Fragrecord::AC_NR_COPY && + tcPtrP->m_nr_delete.m_cnt > 1) + { + jam(); + /** + * Nr delete waiting for disk delete to complete... + */ +#ifdef VM_TRACE + TablerecPtr tablePtr; + tablePtr.i = tcPtrP->tableref; + ptrCheckGuard(tablePtr, ctabrecFileSize, tablerec); + ndbrequire(tablePtr.p->m_disk_table); +#endif + tcPtrP->m_nr_delete.m_cnt--; + tcPtrP->transactionState = TcConnectionrec::WAIT_TUP_COMMIT; + return; + } packLqhkeyreqLab(signal); } else { - ndbrequire(regTcPtr->abortState != TcConnectionrec::NEW_FROM_TC); + ndbrequire(tcPtrP->abortState != TcConnectionrec::NEW_FROM_TC); jam(); sendLqhTransconf(signal, LqhTransConf::Committed); cleanUp(signal); @@ -5847,7 +6521,7 @@ void Dblqh::execABORT(Signal* signal) sendSignal(TLqhRef, GSN_ABORT, signal, 4, JBB); }//if regTcPtr->abortState = TcConnectionrec::ABORT_FROM_TC; - regTcPtr->activeCreat = ZFALSE; + regTcPtr->activeCreat = Fragrecord::AC_NORMAL; const Uint32 commitAckMarker = regTcPtr->commitAckMarker; if(commitAckMarker != RNIL){ @@ -5907,7 +6581,7 @@ void Dblqh::execABORTREQ(Signal* signal) regTcPtr->reqBlockref = reqBlockref; regTcPtr->reqRef = reqPtr; regTcPtr->abortState = TcConnectionrec::REQ_FROM_TC; - regTcPtr->activeCreat = ZFALSE; + regTcPtr->activeCreat = Fragrecord::AC_NORMAL; abortCommonLab(signal); return; }//Dblqh::execABORTREQ() @@ -5952,51 +6626,49 @@ void Dblqh::execACCKEYREF(Signal* signal) }//switch const Uint32 errCode = terrorCode; tcPtr->errorCode = errCode; -/* ------------------------------------------------------------------------- */ -/*WHEN AN ABORT FROM TC ARRIVES IT COULD ACTUALLY BE A CORRECT BEHAVIOUR */ -/*SINCE THE TUPLE MIGHT NOT HAVE ARRIVED YET OR ALREADY HAVE BEEN INSERTED. */ -/* ------------------------------------------------------------------------- */ - if (tcPtr->activeCreat == ZTRUE) { - jam(); -/* ------------------------------------------------------------------------- */ -/*THIS IS A NORMAL EVENT DURING CREATION OF A FRAGMENT. PERFORM ABORT IN */ -/*TUP AND ACC AND THEN CONTINUE WITH NORMAL COMMIT PROCESSING. IF THE ERROR */ -/*HAPPENS TO BE A SERIOUS ERROR THEN PERFORM ABORT PROCESSING AS NORMAL. */ -/* ------------------------------------------------------------------------- */ + + if (TRACENR_FLAG) + { + TRACENR("ACCKEYREF: " << errCode << " "); switch (tcPtr->operation) { - case ZUPDATE: - case ZDELETE: - jam(); - if (errCode != ZNO_TUPLE_FOUND) { - jam(); -/* ------------------------------------------------------------------------- */ -/*A NORMAL ERROR WILL BE TREATED AS A NORMAL ABORT AND WILL ABORT THE */ -/*TRANSACTION. NO SPECIAL HANDLING IS NEEDED. */ -/* ------------------------------------------------------------------------- */ - tcPtr->activeCreat = ZFALSE; - }//if + case ZREAD: TRACENR("READ"); break; + case ZUPDATE: TRACENR("UPDATE"); break; + case ZWRITE: TRACENR("WRITE"); break; + case ZINSERT: TRACENR("INSERT"); break; + case ZDELETE: TRACENR("DELETE"); break; + default: TRACENR("<Unknown: " << tcPtr->operation << ">"); break; + } + + TRACENR(" tab: " << tcPtr->tableref + << " frag: " << tcPtr->fragmentid + << " activeCreat: " << (Uint32)tcPtr->activeCreat); + if (LqhKeyReq::getNrCopyFlag(tcPtr->reqinfo)) + TRACENR(" NrCopy"); + if (LqhKeyReq::getRowidFlag(tcPtr->reqinfo)) + TRACENR(" rowid: " << tcPtr->m_row_id); + TRACENR(" key: " << tcPtr->tupkeyData[0]); + TRACENR(endl); + + } + + if (tcPtr->activeCreat == Fragrecord::AC_NR_COPY) + { + jam(); + Uint32 op = tcPtr->operation; + switch(errCode){ + case ZNO_TUPLE_FOUND: + ndbrequire(op == ZDELETE); break; - case ZINSERT: - jam(); - if (errCode != ZTUPLE_ALREADY_EXIST) { - jam(); -/* ------------------------------------------------------------------------- */ -/*A NORMAL ERROR WILL BE TREATED AS A NORMAL ABORT AND WILL ABORT THE */ -/*TRANSACTION. NO SPECIAL HANDLING IS NEEDED. */ -/* ------------------------------------------------------------------------- */ - tcPtr->activeCreat = ZFALSE; - }//if break; default: - jam(); -/* ------------------------------------------------------------------------- */ -/*A NORMAL ERROR WILL BE TREATED AS A NORMAL ABORT AND WILL ABORT THE */ -/*TRANSACTION. NO SPECIAL HANDLING IS NEEDED. */ -/* ------------------------------------------------------------------------- */ - tcPtr->activeCreat = ZFALSE; - break; - }//switch - } else { + ndbrequire(false); + } + tcPtr->activeCreat = Fragrecord::AC_IGNORED; + } + else + { + ndbrequire(!LqhKeyReq::getNrCopyFlag(tcPtr->reqinfo)); + /** * Only primary replica can get ZTUPLE_ALREADY_EXIST || ZNO_TUPLE_FOUND * @@ -6010,11 +6682,16 @@ void Dblqh::execACCKEYREF(Signal* signal) * * -> ZNO_TUPLE_FOUND is possible */ + ndbrequire(tcPtr->operation == ZREAD + || tcPtr->operation == ZREAD_EX + || tcPtr->seqNoReplica == 0); + ndbrequire (tcPtr->seqNoReplica == 0 || errCode != ZTUPLE_ALREADY_EXIST || (tcPtr->operation == ZREAD && (tcPtr->dirtyOp || tcPtr->opSimple))); } + tcPtr->abortState = TcConnectionrec::ABORT_FROM_LQH; abortCommonLab(signal); return; @@ -6027,7 +6704,7 @@ void Dblqh::localAbortStateHandlerLab(Signal* signal) jam(); return; }//if - regTcPtr->activeCreat = ZFALSE; + regTcPtr->activeCreat = Fragrecord::AC_NORMAL; regTcPtr->abortState = TcConnectionrec::ABORT_FROM_LQH; regTcPtr->errorCode = terrorCode; abortStateHandlerLab(signal); @@ -6207,7 +6884,7 @@ void Dblqh::abortErrorLab(Signal* signal) * ACTIVE CREATION IS RESET FOR ALL ERRORS WHICH SHOULD BE HANDLED * WITH NORMAL ABORT HANDLING. * ----------------------------------------------------------------------- */ - regTcPtr->activeCreat = ZFALSE; + regTcPtr->activeCreat = Fragrecord::AC_NORMAL; abortCommonLab(signal); return; }//Dblqh::abortErrorLab() @@ -6216,7 +6893,8 @@ void Dblqh::abortCommonLab(Signal* signal) { TcConnectionrec * const regTcPtr = tcConnectptr.p; const Uint32 commitAckMarker = regTcPtr->commitAckMarker; - if(regTcPtr->activeCreat != ZTRUE && commitAckMarker != RNIL){ + if(regTcPtr->activeCreat != Fragrecord::AC_IGNORED && + commitAckMarker != RNIL){ /** * There is no NR ongoing and we have a marker */ @@ -6281,42 +6959,12 @@ void Dblqh::abortContinueAfterBlockedLab(Signal* signal, bool canBlock) TcConnectionrec * const regTcPtr = tcConnectptr.p; fragptr.i = regTcPtr->fragmentptr; c_fragment_pool.getPtr(fragptr); - if ((cCommitBlocked == true) && - (fragptr.p->fragActiveStatus == ZTRUE) && - (canBlock == true) && - (regTcPtr->operation != ZREAD)) { - jam(); -/* ------------------------------------------------------------------------- */ -// TUP and/or ACC have problems in writing the undo log to disk fast enough. -// We must avoid the abort at this time and try later instead. The fragment -// is also active with a local checkpoint and this commit can generate UNDO -// log records that overflow the UNDO log buffer. -// -// In certain situations it is simply too complex to insert a wait state here -// since ACC is active and we cannot release the operation from the active -// list without causing great complexity. -/* ------------------------------------------------------------------------- */ -/*---------------------------------------------------------------------------*/ -// We must delay the write of abort info to the log to safe-guard against -// a crash due to lack of log pages. We temporary stop all log writes to this -// log part to ensure that we don't get a buffer explosion in the delayed -// signal buffer instead. -/*---------------------------------------------------------------------------*/ - logPartPtr.i = regTcPtr->hashValue & 3; - ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord); - linkWaitLog(signal, logPartPtr); - regTcPtr->transactionState = TcConnectionrec::ABORT_QUEUED; - if (logPartPtr.p->logPartState == LogPartRecord::IDLE) { - jam(); - logPartPtr.p->logPartState = LogPartRecord::ACTIVE; - }//if - return; - }//if signal->theData[0] = regTcPtr->tupConnectrec; EXECUTE_DIRECT(DBTUP, GSN_TUP_ABORTREQ, signal, 1); regTcPtr->transactionState = TcConnectionrec::WAIT_ACC_ABORT; signal->theData[0] = regTcPtr->accConnectrec; - EXECUTE_DIRECT(DBACC, GSN_ACC_ABORTREQ, signal, 1); + signal->theData[1] = true; + EXECUTE_DIRECT(DBACC, GSN_ACC_ABORTREQ, signal, 2); /* ------------------------------------------------------------------------ * We need to insert a real-time break by sending ACC_ABORTCONF through the * job buffer to ensure that we catch any ACCKEYCONF or TUPKEYCONF or @@ -6337,11 +6985,11 @@ void Dblqh::execACC_ABORTCONF(Signal* signal) ptrCheckGuard(tcConnectptr, ctcConnectrecFileSize, tcConnectionrec); TcConnectionrec * const regTcPtr = tcConnectptr.p; ndbrequire(regTcPtr->transactionState == TcConnectionrec::WAIT_ACC_ABORT); - if (regTcPtr->activeCreat == ZTRUE) { + if (regTcPtr->activeCreat == Fragrecord::AC_IGNORED) { /* ---------------------------------------------------------------------- * A NORMAL EVENT DURING CREATION OF A FRAGMENT. WE NOW NEED TO CONTINUE * WITH NORMAL COMMIT PROCESSING. - * ---------------------------------------------------------------------- */ + * --------------------------------------------------------------------- */ if (regTcPtr->currTupAiLen == regTcPtr->totReclenAi) { jam(); regTcPtr->abortState = TcConnectionrec::ABORT_IDLE; @@ -6657,6 +7305,7 @@ void Dblqh::lqhTransNextLab(Signal* signal) * THE RECEIVER OF THE COPY HAVE FAILED. * WE HAVE TO CLOSE THE COPY PROCESS. * ----------------------------------------------------------- */ + ndbout_c("close copy"); tcConnectptr.p->tcNodeFailrec = tcNodeFailptr.i; tcConnectptr.p->abortState = TcConnectionrec::NEW_FROM_TC; closeCopyRequestLab(signal); @@ -6814,13 +7463,28 @@ void Dblqh::execNEXT_SCANCONF(Signal* signal) jamEntry(); scanptr.i = nextScanConf->scanPtr; c_scanRecordPool.getPtr(scanptr); - if (nextScanConf->localKeyLength == 1) { + if (likely(nextScanConf->localKeyLength == 1)) + { jam(); - nextScanConf->localKey[1] = - nextScanConf->localKey[0] & MAX_TUPLES_PER_PAGE; - nextScanConf->localKey[0] = nextScanConf->localKey[0] >> MAX_TUPLES_BITS; - }//if - + scanptr.p->m_row_id.assref(nextScanConf->localKey[0]); + } + else + { + jam(); + scanptr.p->m_row_id.m_page_no = nextScanConf->localKey[0]; + scanptr.p->m_row_id.m_page_idx = nextScanConf->localKey[1]; + } + +#ifdef VM_TRACE + if (signal->getLength() > 2 && nextScanConf->accOperationPtr != RNIL) + { + Ptr<TcConnectionrec> regTcPtr; + regTcPtr.i = scanptr.p->scanTcrec; + ptrCheckGuard(regTcPtr, ctcConnectrecFileSize, tcConnectionrec); + ndbassert(regTcPtr.p->fragmentid == nextScanConf->fragId); + } +#endif + fragptr.i = scanptr.p->fragPtrI; c_fragment_pool.getPtr(fragptr); switch (scanptr.p->scanState) { @@ -7572,6 +8236,8 @@ void Dblqh::continueAfterReceivingAllAiLab(Signal* signal) AccScanReq::setLockMode(req->requestInfo, scanptr.p->scanLockMode); AccScanReq::setReadCommittedFlag(req->requestInfo, scanptr.p->readCommitted); AccScanReq::setDescendingFlag(req->requestInfo, scanptr.p->descending); + AccScanReq::setNoDiskScanFlag(req->requestInfo, + !tcConnectptr.p->m_disk_table); req->transId1 = tcConnectptr.p->transid[0]; req->transId2 = tcConnectptr.p->transid[1]; req->savePointId = tcConnectptr.p->savePointId; @@ -8094,9 +8760,6 @@ void Dblqh::nextScanConfScanLab(Signal* signal) scanptr.p->m_curr_batch_size_rows, nextScanConf->accOperationPtr); jam(); - scanptr.p->scanLocalref[0] = nextScanConf->localKey[0]; - scanptr.p->scanLocalref[1] = nextScanConf->localKey[1]; - scanptr.p->scanLocalFragid = nextScanConf->fragId; nextScanConfLoopLab(signal); }//Dblqh::nextScanConfScanLab() @@ -8145,9 +8808,7 @@ Dblqh::next_scanconf_load_diskpage(Signal* signal, jam(); int res; - Uint32 local_key; - local_key = scanptr.p->scanLocalref[0] << MAX_TUPLES_BITS; - local_key += scanptr.p->scanLocalref[1]; + Uint32 local_key = scanPtr.p->m_row_id.ref(); if((res= c_tup->load_diskpage_scan(signal, regTcPtr.p->tupConnectrec, @@ -8214,13 +8875,13 @@ Dblqh::next_scanconf_tupkeyreq(Signal* signal, Uint32 reqinfo = (scanPtr.p->scanLockHold == ZFALSE); reqinfo = reqinfo + (regTcPtr->operation << 6); reqinfo = reqinfo + (regTcPtr->opExec << 10); - + TupKeyReq * const tupKeyReq = (TupKeyReq *)signal->getDataPtrSend(); tupKeyReq->connectPtr = regTcPtr->tupConnectrec; tupKeyReq->request = reqinfo; - tupKeyReq->keyRef1 = scanPtr.p->scanLocalref[0]; - tupKeyReq->keyRef2 = scanPtr.p->scanLocalref[1]; + tupKeyReq->keyRef1 = scanPtr.p->m_row_id.m_page_no; + tupKeyReq->keyRef2 = scanPtr.p->m_row_id.m_page_idx; tupKeyReq->attrBufLen = 0; tupKeyReq->opRef = scanPtr.p->scanApiOpPtr; tupKeyReq->applRef = scanPtr.p->scanApiBlockref; @@ -8260,9 +8921,9 @@ Uint32 Dblqh::readPrimaryKeys(ScanRecord *scanP, TcConnectionrec *tcConP, Uint32 *dst) { Uint32 tableId = tcConP->tableref; - Uint32 fragId = scanP->scanLocalFragid; - Uint32 fragPageId = scanP->scanLocalref[0]; - Uint32 pageIndex = scanP->scanLocalref[1]; + Uint32 fragId = tcConP->fragmentid; + Uint32 fragPageId = scanP->m_row_id.m_page_no; + Uint32 pageIndex = scanP->m_row_id.m_page_idx; if(scanP->rangeScan) { @@ -8636,9 +9297,7 @@ Uint32 Dblqh::initScanrec(const ScanFragReq* scanFragReq) scanptr.p->tupScan = tupScan; scanptr.p->scanState = ScanRecord::SCAN_FREE; scanptr.p->scanFlag = ZFALSE; - scanptr.p->scanLocalref[0] = 0; - scanptr.p->scanLocalref[1] = 0; - scanptr.p->scanLocalFragid = 0; + scanptr.p->m_row_id.setNull(); scanptr.p->scanTcWaiting = ZTRUE; scanptr.p->scanNumber = ~0; scanptr.p->scanApiOpPtr = scanFragReq->clientOpPtr; @@ -8741,8 +9400,8 @@ void Dblqh::initScanTc(const ScanFragReq* req, tcConnectptr.p->commitAckMarker = RNIL; tcConnectptr.p->m_offset_current_keybuf = 0; tcConnectptr.p->m_scan_curr_range_no = 0; - tcConnectptr.p->m_disk_table = tabptr.p->m_disk_table; - + tcConnectptr.p->m_dealloc = 0; + TablerecPtr tTablePtr; tTablePtr.i = tabptr.p->primaryTableId; ptrCheckGuard(tTablePtr, ctabrecFileSize, tablerec); @@ -9040,7 +9699,8 @@ void Dblqh::execCOPY_FRAGREQ(Signal* signal) const Uint32 copyPtr = copyFragReq->userPtr; const Uint32 userRef = copyFragReq->userRef; const Uint32 nodeId = copyFragReq->nodeId; - + const Uint32 gci = copyFragReq->gci; + ndbrequire(cnoActiveCopy < 3); ndbrequire(getFragmentrec(signal, fragId)); ndbrequire(fragptr.p->copyFragState == ZIDLE); @@ -9097,7 +9757,7 @@ void Dblqh::execCOPY_FRAGREQ(Signal* signal) scanptr.p->scanApiOpPtr = tcConnectptr.i; scanptr.p->scanApiBlockref = reference(); fragptr.p->m_scanNumberMask.clear(NR_ScanNo); - scanptr.p->scanBlockref = DBACC_REF; + scanptr.p->scanBlockref = DBTUP_REF; scanptr.p->scanLockHold = ZFALSE; initScanTc(0, @@ -9111,6 +9771,7 @@ void Dblqh::execCOPY_FRAGREQ(Signal* signal) tcConnectptr.p->copyCountWords = 0; tcConnectptr.p->tcOprec = tcConnectptr.i; tcConnectptr.p->schemaVersion = scanptr.p->scanSchemaVersion; + tcConnectptr.p->savePointId = gci; scanptr.p->scanState = ScanRecord::WAIT_ACC_COPY; AccScanReq * req = (AccScanReq*)&signal->theData[0]; req->senderData = scanptr.i; @@ -9120,10 +9781,13 @@ void Dblqh::execCOPY_FRAGREQ(Signal* signal) req->requestInfo = 0; AccScanReq::setLockMode(req->requestInfo, 0); AccScanReq::setReadCommittedFlag(req->requestInfo, 0); + AccScanReq::setNRScanFlag(req->requestInfo, gci ? 1 : 0); + AccScanReq::setNoDiskScanFlag(req->requestInfo, 1); + req->transId1 = tcConnectptr.p->transid[0]; req->transId2 = tcConnectptr.p->transid[1]; req->savePointId = tcConnectptr.p->savePointId; - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_ACC_SCANREQ, signal, + sendSignal(scanptr.p->scanBlockref, GSN_ACC_SCANREQ, signal, AccScanReq::SignalLength, JBB); return; }//Dblqh::execCOPY_FRAGREQ() @@ -9152,7 +9816,7 @@ void Dblqh::accScanConfCopyLab(Signal* signal) signal->theData[2] = scanptr.p->scanSchemaVersion; signal->theData[3] = ZSTORED_PROC_COPY; // theData[4] is not used in TUP with ZSTORED_PROC_COPY - sendSignal(tcConnectptr.p->tcTupBlockref, GSN_STORED_PROCREQ, signal, 5, JBB); + sendSignal(scanptr.p->scanBlockref, GSN_STORED_PROCREQ, signal, 5, JBB); return; }//Dblqh::accScanConfCopyLab() @@ -9211,12 +9875,25 @@ void Dblqh::storedProcConfCopyLab(Signal* signal) void Dblqh::continueFirstCopyAfterBlockedLab(Signal* signal) { + /** + * Start sending ROWID for all operations from now on + */ + fragptr.p->m_copy_started_state = Fragrecord::AC_NR_COPY; + + if (0) + { + ndbout_c("STOPPING COPY (%d -> %d %d %d)", + scanptr.p->scanBlockref, + scanptr.p->scanAccPtr, RNIL, NextScanReq::ZSCAN_NEXT); + return; + } + scanptr.i = tcConnectptr.p->tcScanRec; c_scanRecordPool.getPtr(scanptr); signal->theData[0] = scanptr.p->scanAccPtr; signal->theData[1] = RNIL; signal->theData[2] = NextScanReq::ZSCAN_NEXT; - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); return; }//Dblqh::continueFirstCopyAfterBlockedLab() @@ -9255,33 +9932,99 @@ void Dblqh::nextScanConfCopyLab(Signal* signal) // completion. Signal completion through scanCompletedStatus-flag. /*---------------------------------------------------------------------------*/ scanptr.p->scanCompletedStatus = ZTRUE; + scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY; + if (ERROR_INSERTED(5042)) + { + CLEAR_ERROR_INSERT_VALUE; + tcConnectptr.p->copyCountWords = ~0; + signal->theData[0] = 9999; + sendSignal(numberToRef(CMVMI, scanptr.p->scanNodeId), + GSN_NDB_TAMPER, signal, 1, JBA); + } return; }//if - // If accOperationPtr == RNIL no record was returned by ACC - if (nextScanConf->accOperationPtr == RNIL) { + TcConnectionrec * tcConP = tcConnectptr.p; + + tcConP->m_use_rowid = true; + tcConP->m_row_id = scanptr.p->m_row_id; + + if (signal->getLength() == 7) + { jam(); - signal->theData[0] = scanptr.p->scanAccPtr; - signal->theData[1] = AccCheckScan::ZCHECK_LCP_STOP; - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_ACC_CHECK_SCAN, signal, 2, JBB); - return; - } + ndbrequire(nextScanConf->accOperationPtr == RNIL); + initCopyTc(signal, ZDELETE); + set_acc_ptr_in_scan_record(scanptr.p, 0, RNIL); + tcConP->gci = nextScanConf->gci; - set_acc_ptr_in_scan_record(scanptr.p, 0, nextScanConf->accOperationPtr); - initCopyTc(signal); + tcConP->primKeyLen = 0; + tcConP->totSendlenAi = 0; + tcConP->connectState = TcConnectionrec::COPY_CONNECTED; - Fragrecord* fragPtrP= fragptr.p; - scanptr.p->scanState = ScanRecord::WAIT_TUPKEY_COPY; - tcConnectptr.p->transactionState = TcConnectionrec::COPY_TUPKEY; - if(tcConnectptr.p->m_disk_table) - { - next_scanconf_load_diskpage(signal, scanptr, tcConnectptr,fragPtrP); +/*---------------------------------------------------------------------------*/ +// To avoid using up to many operation records in ACC we will increase the +// constant to ensure that we never send more than 40 records at a time. +// This is where the constant 56 comes from. For long records this constant +// will not matter that much. The current maximum is 6000 words outstanding +// (including a number of those 56 words not really sent). We also have to +// ensure that there are never more simultaneous usage of these operation +// records to ensure that node recovery does not fail because of simultaneous +// scanning. +/*---------------------------------------------------------------------------*/ + UintR TnoOfWords = 8; + TnoOfWords = TnoOfWords + MAGIC_CONSTANT; + TnoOfWords = TnoOfWords + (TnoOfWords >> 2); + + /*----------------------------------------------------------------- + * NOTE for transid1! + * Transid1 in the tcConnection record is used load regulate the + * copy(node recovery) process. + * The number of outstanding words are written in the transid1 + * variable. This will be sent to the starting node in the + * LQHKEYREQ signal and when the answer is returned in the LQHKEYCONF + * we can reduce the number of outstanding words and check to see + * if more LQHKEYREQ signals should be sent. + * + * However efficient this method is rather unsafe in such way that + * it overwrites the transid1 original data. + * + * Also see TR 587. + *----------------------------------------------------------------*/ + tcConP->transid[0] = TnoOfWords; // Data overload, see note! + packLqhkeyreqLab(signal); + tcConP->copyCountWords += TnoOfWords; + scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY; + if (tcConP->copyCountWords < cmaxWordsAtNodeRec) { + nextRecordCopy(signal); + } + return; } else { - next_scanconf_tupkeyreq(signal, scanptr, tcConnectptr.p, fragPtrP, RNIL); + // If accOperationPtr == RNIL no record was returned by ACC + if (nextScanConf->accOperationPtr == RNIL) { + jam(); + signal->theData[0] = scanptr.p->scanAccPtr; + signal->theData[1] = AccCheckScan::ZCHECK_LCP_STOP; + sendSignal(scanptr.p->scanBlockref, GSN_ACC_CHECK_SCAN, signal, 2, JBB); + return; + } + + initCopyTc(signal, ZINSERT); + set_acc_ptr_in_scan_record(scanptr.p, 0, nextScanConf->accOperationPtr); + + Fragrecord* fragPtrP= fragptr.p; + scanptr.p->scanState = ScanRecord::WAIT_TUPKEY_COPY; + tcConP->transactionState = TcConnectionrec::COPY_TUPKEY; + if(tcConP->m_disk_table) + { + next_scanconf_load_diskpage(signal, scanptr, tcConnectptr,fragPtrP); + } + else + { + next_scanconf_tupkeyreq(signal, scanptr, tcConP, fragPtrP, RNIL); + } } - return; }//Dblqh::nextScanConfCopyLab() @@ -9357,6 +10100,7 @@ void Dblqh::copyTupkeyConfLab(Signal* signal) Uint32* tmp = signal->getDataPtrSend()+24; Uint32 len= tcConnectptr.p->primKeyLen = readPrimaryKeys(scanP, tcConP, tmp); + tcConP->gci = tmp[len]; // Calculate hash (no need to linearies key) if (g_key_descriptor_pool.getPtr(tableId)->hasCharAttr) { @@ -9523,10 +10267,23 @@ void Dblqh::continueCopyAfterBlockedLab(Signal* signal) c_scanRecordPool.getPtr(scanptr); tcConnectptr.p->errorCode = 0; Uint32 acc_op_ptr= get_acc_ptr_from_scan_record(scanptr.p, 0, false); - signal->theData[0] = scanptr.p->scanAccPtr; - signal->theData[1] = acc_op_ptr; - signal->theData[2] = NextScanReq::ZSCAN_NEXT_COMMIT; - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + if (acc_op_ptr != RNIL) + { + signal->theData[0] = scanptr.p->scanAccPtr; + signal->theData[1] = acc_op_ptr; + signal->theData[2] = NextScanReq::ZSCAN_NEXT_COMMIT; + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + } + else + { + /** + * No need to commit (unlock) + */ + signal->theData[0] = scanptr.p->scanAccPtr; + signal->theData[1] = RNIL; + signal->theData[2] = NextScanReq::ZSCAN_NEXT; + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + } return; }//Dblqh::continueCopyAfterBlockedLab() @@ -9557,6 +10314,12 @@ void Dblqh::closeCopyLab(Signal* signal) tcConnectptr.p->transid[1] = 0; fragptr.i = tcConnectptr.p->fragmentptr; c_fragment_pool.getPtr(fragptr); + + /** + * Stop sending ROWID for all operations from now on + */ + fragptr.p->m_copy_started_state = Fragrecord::AC_NORMAL; + scanptr.i = tcConnectptr.p->tcScanRec; c_scanRecordPool.getPtr(scanptr); scanptr.p->scanState = ScanRecord::WAIT_CLOSE_COPY; @@ -9596,8 +10359,8 @@ void Dblqh::continueCloseCopyAfterBlockedLab(Signal* signal) c_scanRecordPool.getPtr(scanptr); signal->theData[0] = scanptr.p->scanAccPtr; signal->theData[1] = RNIL; - signal->theData[2] = ZCOPY_CLOSE; - sendSignal(tcConnectptr.p->tcAccBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); + signal->theData[2] = NextScanReq::ZSCAN_CLOSE; + sendSignal(scanptr.p->scanBlockref, GSN_NEXT_SCANREQ, signal, 3, JBB); return; }//Dblqh::continueCloseCopyAfterBlockedLab() @@ -9682,7 +10445,7 @@ void Dblqh::tupCopyCloseConfLab(Signal* signal) conf->tableId = tcConnectptr.p->tableref; conf->fragId = tcConnectptr.p->fragmentid; sendSignal(tcConnectptr.p->clientBlockref, GSN_COPY_FRAGCONF, signal, - CopyFragConf::SignalLength, JBB); + CopyFragConf::SignalLength, JBB); }//if }//if releaseActiveCopy(signal); @@ -9701,6 +10464,7 @@ void Dblqh::tupCopyCloseConfLab(Signal* signal) void Dblqh::closeCopyRequestLab(Signal* signal) { scanptr.p->scanErrorCounter++; + ndbout_c("closeCopyRequestLab: scanState: %d", scanptr.p->scanState); switch (scanptr.p->scanState) { case ScanRecord::WAIT_TUPKEY_COPY: case ScanRecord::WAIT_NEXT_SCAN_COPY: @@ -9784,7 +10548,13 @@ void Dblqh::execCOPY_ACTIVEREQ(Signal* signal) }//if return; }//if + fragptr.p->fragStatus = Fragrecord::FSACTIVE; + if (TRACENR_FLAG) + TRACENR("tab: " << tabptr.i + << " frag: " << fragId + << " COPY ACTIVE" << endl); + if (fragptr.p->lcpFlag == Fragrecord::LCP_STATE_TRUE) { jam(); fragptr.p->logFlag = Fragrecord::STATE_TRUE; @@ -9930,21 +10700,18 @@ void Dblqh::execCOPY_STATEREQ(Signal* signal) /* */ /* SUBROUTINE SHORT NAME = ICT */ /* ========================================================================= */ -void Dblqh::initCopyTc(Signal* signal) +void Dblqh::initCopyTc(Signal* signal, Operation_t op) { - const NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0]; - scanptr.p->scanLocalref[0] = nextScanConf->localKey[0]; - scanptr.p->scanLocalref[1] = nextScanConf->localKey[1]; - scanptr.p->scanLocalFragid = nextScanConf->fragId; tcConnectptr.p->operation = ZREAD; tcConnectptr.p->apiVersionNo = 0; tcConnectptr.p->opExec = 0; /* NOT INTERPRETED MODE */ tcConnectptr.p->schemaVersion = scanptr.p->scanSchemaVersion; Uint32 reqinfo = 0; - LqhKeyReq::setLockType(reqinfo, ZINSERT); LqhKeyReq::setDirtyFlag(reqinfo, 1); LqhKeyReq::setSimpleFlag(reqinfo, 1); - LqhKeyReq::setOperation(reqinfo, ZWRITE); + LqhKeyReq::setOperation(reqinfo, op); + LqhKeyReq::setGCIFlag(reqinfo, 1); + LqhKeyReq::setNrCopyFlag(reqinfo, 1); /* AILen in LQHKEYREQ IS ZERO */ tcConnectptr.p->reqinfo = reqinfo; /* ------------------------------------------------------------------------ */ @@ -10264,7 +11031,6 @@ void Dblqh::execBACKUP_FRAGMENT_CONF(Signal* signal) * ----------------------------------------------------------------------- */ fragptr.i = lcpPtr.p->currentFragment.fragPtrI; c_fragment_pool.getPtr(fragptr); - fragptr.p->fragActiveStatus = ZFALSE; contChkpNextFragLab(signal); return; @@ -10718,6 +11484,29 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal) const Uint32 dihBlockRef = saveReq->dihBlockRef; const Uint32 dihPtr = saveReq->dihPtr; const Uint32 gci = saveReq->gci; + + if(getNodeState().startLevel >= NodeState::SL_STOPPING_4){ + GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0]; + saveRef->dihPtr = dihPtr; + saveRef->nodeId = getOwnNodeId(); + saveRef->gci = gci; + saveRef->errorCode = GCPSaveRef::NodeShutdownInProgress; + sendSignal(dihBlockRef, GSN_GCP_SAVEREF, signal, + GCPSaveRef::SignalLength, JBB); + return; + } + + if (getNodeState().getNodeRestartInProgress()) + { + GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0]; + saveRef->dihPtr = dihPtr; + saveRef->nodeId = getOwnNodeId(); + saveRef->gci = gci; + saveRef->errorCode = GCPSaveRef::NodeRestartInProgress; + sendSignal(dihBlockRef, GSN_GCP_SAVEREF, signal, + GCPSaveRef::SignalLength, JBB); + return; + } ndbrequire(gci >= cnewestCompletedGci); @@ -10754,30 +11543,7 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal) }//if ndbrequire(ccurrentGcprec == RNIL); - - - if(getNodeState().startLevel >= NodeState::SL_STOPPING_4){ - GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0]; - saveRef->dihPtr = dihPtr; - saveRef->nodeId = getOwnNodeId(); - saveRef->gci = gci; - saveRef->errorCode = GCPSaveRef::NodeShutdownInProgress; - sendSignal(dihBlockRef, GSN_GCP_SAVEREF, signal, - GCPSaveRef::SignalLength, JBB); - return; - } - - if(getNodeState().getNodeRestartInProgress()){ - GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0]; - saveRef->dihPtr = dihPtr; - saveRef->nodeId = getOwnNodeId(); - saveRef->gci = gci; - saveRef->errorCode = GCPSaveRef::NodeRestartInProgress; - sendSignal(dihBlockRef, GSN_GCP_SAVEREF, signal, - GCPSaveRef::SignalLength, JBB); - return; - } - + ccurrentGcprec = 0; gcpPtr.i = ccurrentGcprec; ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord); @@ -11696,7 +12462,7 @@ void Dblqh::lastWriteInFileLab(Signal* signal) /* WE WILL CLOSE THE FILE. */ /*---------------------------------------------------------------------------*/ logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_WRITE_LOG; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); }//if }//if }//if @@ -11866,7 +12632,7 @@ void Dblqh::writeInitMbyteLab(Signal* signal) jam(); releaseLogpage(signal); logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_INIT; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); return; }//if writeInitMbyte(signal); @@ -12424,7 +13190,7 @@ void Dblqh::readSrFrontpageLab(Signal* signal) * ------------------------------------------------------------------------ */ releaseLogpage(signal); logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); LogFileRecordPtr locLogFilePtr; findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr); locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_LAST_FILE; @@ -12495,7 +13261,7 @@ void Dblqh::readSrLastMbyteLab(Signal* signal) }//if }//if logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { Uint32 fileNo; if (logFilePtr.p->fileNo >= ZMAX_LOG_FILES_IN_PAGE_ZERO) { @@ -12553,7 +13319,7 @@ void Dblqh::readSrNextFileLab(Signal* signal) }//if releaseLogpage(signal); logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); if (logPartPtr.p->srRemainingFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { Uint32 fileNo; if (logFilePtr.p->fileNo >= ZMAX_LOG_FILES_IN_PAGE_ZERO) { @@ -12862,13 +13628,6 @@ void Dblqh::execSTART_RECREQ(Signal* signal) * WE ALSO NEED TO SET CNEWEST_GCI TO ENSURE THAT LOG RECORDS ARE EXECUTED * WITH A PROPER GCI. *------------------------------------------------------------------------ */ - if (cstartType == NodeState::ST_NODE_RESTART) { - jam(); - signal->theData[0] = ZSR_PHASE3_START; - signal->theData[1] = ZSR_PHASE2_COMPLETED; - sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); - return; - }//if if(cstartType == NodeState::ST_INITIAL_NODE_RESTART){ jam(); StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend(); @@ -13016,16 +13775,9 @@ void Dblqh::execSTART_EXEC_SR(Signal* signal) * WE NEED TO SEND THOSE SIGNALS EVEN IF WE HAVE NOT REQUESTED * ANY FRAGMENTS PARTICIPATE IN THIS PHASE. * --------------------------------------------------------------------- */ - for (Uint32 i = 0; i < cnoOfNodes; i++) { - jam(); - if (cnodeStatus[i] == ZNODE_UP) { - jam(); - ndbrequire(cnodeData[i] < MAX_NDB_NODES); - BlockReference ref = calcLqhBlockRef(cnodeData[i]); - signal->theData[0] = cownNodeid; - sendSignal(ref, GSN_EXEC_SRREQ, signal, 1, JBB); - }//if - }//for + NodeReceiverGroup rg(DBLQH, m_sr_nodes); + signal->theData[0] = cownNodeid; + sendSignal(rg, GSN_EXEC_SRREQ, signal, 1, JBB); return; } else { jam(); @@ -13066,8 +13818,15 @@ void Dblqh::execSTART_EXEC_SR(Signal* signal) c_lcp_complete_fragments.remove(fragptr); c_redo_complete_fragments.add(fragptr); - fragptr.p->fragStatus = Fragrecord::FSACTIVE; - fragptr.p->logFlag = Fragrecord::STATE_TRUE; + if (!getNodeState().getNodeRestartInProgress()) + { + fragptr.p->logFlag = Fragrecord::STATE_TRUE; + fragptr.p->fragStatus = Fragrecord::FSACTIVE; + } + else + { + fragptr.p->fragStatus = Fragrecord::ACTIVE_CREATION; + } signal->theData[0] = fragptr.p->srUserptr; signal->theData[1] = cownNodeid; sendSignal(fragptr.p->srBlockref, GSN_START_FRAGCONF, signal, 2, JBB); @@ -13165,32 +13924,22 @@ void Dblqh::execEXEC_SRCONF(Signal* signal) jamEntry(); Uint32 nodeId = signal->theData[0]; arrGuard(nodeId, MAX_NDB_NODES); - cnodeExecSrState[nodeId] = ZEXEC_SR_COMPLETED; - ndbrequire(cnoOfNodes < MAX_NDB_NODES); - for (Uint32 i = 0; i < cnoOfNodes; i++) { + m_sr_exec_sr_conf.set(nodeId); + if (!m_sr_nodes.equal(m_sr_exec_sr_conf)) + { jam(); - if (cnodeStatus[i] == ZNODE_UP) { - jam(); - nodeId = cnodeData[i]; - arrGuard(nodeId, MAX_NDB_NODES); - if (cnodeExecSrState[nodeId] != ZEXEC_SR_COMPLETED) { - jam(); - /* ------------------------------------------------------------------ - * ALL NODES HAVE NOT REPORTED COMPLETION OF EXECUTING FRAGMENT - * LOGS YET. - * ----------------------------------------------------------------- */ - return; - }//if - }//if - }//for - + /* ------------------------------------------------------------------ + * ALL NODES HAVE NOT REPORTED COMPLETION OF EXECUTING FRAGMENT + * LOGS YET. + * ----------------------------------------------------------------- */ + return; + } + /* ------------------------------------------------------------------------ * CLEAR NODE SYSTEM RESTART EXECUTION STATE TO PREPARE FOR NEXT PHASE OF * LOG EXECUTION. * ----------------------------------------------------------------------- */ - for (nodeId = 0; nodeId < MAX_NDB_NODES; nodeId++) { - cnodeExecSrState[nodeId] = ZSTART_SR; - }//for + m_sr_exec_sr_conf.clear(); /* ------------------------------------------------------------------------ * NOW CHECK IF ALL FRAGMENTS IN THIS PHASE HAVE COMPLETED. IF SO START THE @@ -13274,29 +14023,19 @@ void Dblqh::execEXEC_SRREQ(Signal* signal) jamEntry(); Uint32 nodeId = signal->theData[0]; ndbrequire(nodeId < MAX_NDB_NODES); - cnodeSrState[nodeId] = ZEXEC_SR_COMPLETED; - ndbrequire(cnoOfNodes < MAX_NDB_NODES); - for (Uint32 i = 0; i < cnoOfNodes; i++) { + m_sr_exec_sr_req.set(nodeId); + if (!m_sr_exec_sr_req.equal(m_sr_nodes)) + { jam(); - if (cnodeStatus[i] == ZNODE_UP) { - jam(); - nodeId = cnodeData[i]; - if (cnodeSrState[nodeId] != ZEXEC_SR_COMPLETED) { - jam(); - /* ------------------------------------------------------------------ - * ALL NODES HAVE NOT REPORTED COMPLETION OF SENDING EXEC_FRAGREQ YET. - * ----------------------------------------------------------------- */ - return; - }//if - }//if - }//for + return; + } + /* ------------------------------------------------------------------------ * CLEAR NODE SYSTEM RESTART STATE TO PREPARE FOR NEXT PHASE OF LOG * EXECUTION * ----------------------------------------------------------------------- */ - for (nodeId = 0; nodeId < MAX_NDB_NODES; nodeId++) { - cnodeSrState[nodeId] = ZSTART_SR; - }//for + m_sr_exec_sr_req.clear(); + if (csrPhasesCompleted != 0) { /* ---------------------------------------------------------------------- * THE FIRST PHASE MUST ALWAYS EXECUTE THE LOG. @@ -13304,7 +14043,7 @@ void Dblqh::execEXEC_SRREQ(Signal* signal) if (cnoFragmentsExecSr == 0) { jam(); /* -------------------------------------------------------------------- - * THERE WERE NO FRAGMENTS THAT NEEDED TO EXECUTE THE LOG IN THIS PHASE. + * THERE WERE NO FRAGMENTS THAT NEEDED TO EXECUTE THE LOG IN THIS PHASE. * ------------------------------------------------------------------- */ srPhase3Comp(signal); return; @@ -13348,11 +14087,6 @@ void Dblqh::srPhase3Start(Signal* signal) if (csrPhaseStarted == ZSR_NO_PHASE_STARTED) { jam(); csrPhaseStarted = tsrPhaseStarted; - if (cstartType == NodeState::ST_NODE_RESTART) { - ndbrequire(cinitialStartOngoing == ZTRUE); - cinitialStartOngoing = ZFALSE; - checkStartCompletedLab(signal); - }//if return; }//if ndbrequire(csrPhaseStarted != tsrPhaseStarted); @@ -13375,22 +14109,12 @@ void Dblqh::srPhase3Start(Signal* signal) logPartPtr.p->logLastGci = 2; }//if }//for - if (cstartType == NodeState::ST_NODE_RESTART) { - jam(); - /* ---------------------------------------------------------------------- - * FOR A NODE RESTART WE HAVE NO FRAGMENTS DEFINED YET. - * THUS WE CAN SKIP THAT PART - * --------------------------------------------------------------------- */ - signal->theData[0] = ZSR_GCI_LIMITS; - signal->theData[1] = RNIL; - sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); - } else { - jam(); - c_lcp_complete_fragments.first(fragptr); - signal->theData[0] = ZSR_GCI_LIMITS; - signal->theData[1] = fragptr.i; - sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); - }//if + + jam(); + c_lcp_complete_fragments.first(fragptr); + signal->theData[0] = ZSR_GCI_LIMITS; + signal->theData[1] = fragptr.i; + sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); return; }//Dblqh::srPhase3Start() @@ -13750,7 +14474,7 @@ void Dblqh::execSr(Signal* signal) jam(); releaseMmPages(signal); logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_SR_COMPLETED; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); return; break; case LogPartRecord::LES_EXEC_LOG_NEW_MBYTE: @@ -13766,7 +14490,7 @@ void Dblqh::execSr(Signal* signal) ptrCheckGuard(nextLogFilePtr, clogFileFileSize, logFileRecord); nextLogFilePtr.p->currentMbyte = 0; logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_SR; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); return; break; case LogPartRecord::LES_EXEC_LOG: @@ -14101,6 +14825,8 @@ void Dblqh::execLogRecord(Signal* signal) ptrCheckGuard(tcConnectptr, ctcConnectrecFileSize, tcConnectionrec); fragptr.i = tcConnectptr.p->fragmentptr; c_fragment_pool.getPtr(fragptr); + tcConnectptr.p->m_log_part_ptr_i = fragptr.p->m_log_part_ptr_i; + // Read a log record and prepare it for execution readLogHeader(signal); readKey(signal); @@ -14217,7 +14943,7 @@ void Dblqh::exitFromInvalidate(Signal* signal) { if (logFilePtr.i != nextAfterCurrentLogFilePtr.i) { // This file should be closed. logFilePtr.p->logFileStatus = LogFileRecord::CLOSE_SR_INVALIDATE_PAGES; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); // Return from this function and wait for close confirm. Then come back // and test the previous file for closing. return; @@ -14274,17 +15000,19 @@ void Dblqh::logLqhkeyrefLab(Signal* signal) case ZUPDATE: case ZDELETE: jam(); - ndbrequire(terrorCode == ZNO_TUPLE_FOUND); + if (unlikely(terrorCode != ZNO_TUPLE_FOUND)) + goto error; break; case ZINSERT: jam(); - ndbrequire(terrorCode == ZTUPLE_ALREADY_EXIST); + if (unlikely(terrorCode != ZTUPLE_ALREADY_EXIST && terrorCode != 899)) + goto error; + break; default: - ndbrequire(false); - return; - break; - }//switch + goto error; + } + if (result == ZOK) { jam(); execLogRecord(signal); @@ -14303,6 +15031,19 @@ void Dblqh::logLqhkeyrefLab(Signal* signal) * PROCEEDING IN RARE CASES. * ----------------------------------------------------------------------- */ return; +error: + BaseString tmp; + tmp.appfmt("You have found a bug!" + " Failed op (%s) during REDO table: %d fragment: %d err: %d", + tcConnectptr.p->operation == ZINSERT ? "INSERT" : + tcConnectptr.p->operation == ZUPDATE ? "UPDATE" : + tcConnectptr.p->operation == ZDELETE ? "DELETE" : + tcConnectptr.p->operation == ZWRITE ? "WRITE" : "<unknown>", + tcConnectptr.p->tableref, + tcConnectptr.p->fragmentid, + terrorCode); + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, + tmp.c_str()); }//Dblqh::logLqhkeyrefLab() void Dblqh::closeExecSrCompletedLab(Signal* signal) @@ -14350,24 +15091,11 @@ void Dblqh::execLogComp(Signal* signal) * ALL LOG PARTS HAVE COMPLETED THE EXECUTION OF THE LOG. WE CAN NOW START * SENDING THE EXEC_FRAGCONF SIGNALS TO ALL INVOLVED FRAGMENTS. * ----------------------------------------------------------------------- */ - if (cstartType != NodeState::ST_NODE_RESTART) { - jam(); - c_lcp_complete_fragments.first(fragptr); - signal->theData[0] = ZSEND_EXEC_CONF; - signal->theData[1] = fragptr.i; - sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); - } else { - jam(); - /* ---------------------------------------------------------------------- - * FOR NODE RESTART WE CAN SKIP A NUMBER OF STEPS SINCE WE HAVE NO - * FRAGMENTS DEFINED AT THIS POINT. OBVIOUSLY WE WILL NOT NEED TO - * EXECUTE ANY MORE LOG STEPS EITHER AND THUS WE CAN IMMEDIATELY - * START FINDING THE END AND THE START OF THE LOG. - * --------------------------------------------------------------------- */ - csrPhasesCompleted = 3; - execSrCompletedLab(signal); - return; - }//if + jam(); + c_lcp_complete_fragments.first(fragptr); + signal->theData[0] = ZSEND_EXEC_CONF; + signal->theData[1] = fragptr.i; + sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); return; }//Dblqh::execLogComp() @@ -14756,7 +15484,7 @@ void Dblqh::aiStateErrorCheckLab(Signal* signal, Uint32* dataPtr, Uint32 length) /* YET. THIS IS POSSIBLE IF ACTIVE CREATION OF THE FRAGMENT IS */ /* ONGOING. */ /*************************************************************************>*/ - if (tcConnectptr.p->activeCreat == ZTRUE) { + if (tcConnectptr.p->activeCreat == Fragrecord::AC_IGNORED) { jam(); /*************************************************************************>*/ /* ONGOING ABORTS DURING ACTIVE CREATION MUST SAVE THE ATTRIBUTE INFO*/ @@ -14801,7 +15529,7 @@ void Dblqh::aiStateErrorCheckLab(Signal* signal, Uint32* dataPtr, Uint32 length) /* COMPLETED AND THAT THE ERROR CODE IS PROPERLY SET */ /*************************************************************************>*/ tcConnectptr.p->errorCode = terrorCode; - tcConnectptr.p->activeCreat = ZFALSE; + tcConnectptr.p->activeCreat = Fragrecord::AC_NORMAL; if (tcConnectptr.p->transactionState == TcConnectionrec::WAIT_AI_AFTER_ABORT) { jam(); @@ -15073,13 +15801,15 @@ void Dblqh::checkSrCompleted(Signal* signal) /* ------ CLOSE A FILE DURING EXECUTION OF FRAGMENT LOG ------- */ /* */ /* ------------------------------------------------------------------------- */ -void Dblqh::closeFile(Signal* signal, LogFileRecordPtr clfLogFilePtr) +void Dblqh::closeFile(Signal* signal, + LogFileRecordPtr clfLogFilePtr, Uint32 line) { signal->theData[0] = clfLogFilePtr.p->fileRef; signal->theData[1] = cownref; signal->theData[2] = clfLogFilePtr.i; signal->theData[3] = ZCLOSE_NO_DELETE; - sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 4, JBA); + signal->theData[4] = line; + sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 5, JBA); }//Dblqh::closeFile() @@ -15411,7 +16141,6 @@ void Dblqh::initialiseFragrec(Signal* signal) refresh_watch_dog(); new (fragptr.p) Fragrecord(); fragptr.p->fragStatus = Fragrecord::FREE; - fragptr.p->fragActiveStatus = ZFALSE; fragptr.p->execSrStatus = Fragrecord::IDLE; fragptr.p->srStatus = Fragrecord::SS_IDLE; } @@ -15570,10 +16299,9 @@ void Dblqh::initialiseRecordsLab(Signal* signal, Uint32 data, switch (data) { case 0: jam(); - for (i = 0; i < MAX_NDB_NODES; i++) { - cnodeSrState[i] = ZSTART_SR; - cnodeExecSrState[i] = ZSTART_SR; - }//for + m_sr_nodes.clear(); + m_sr_exec_sr_req.clear(); + m_sr_exec_sr_conf.clear(); for (i = 0; i < 1024; i++) { ctransidHash[i] = RNIL; }//for @@ -15581,16 +16309,11 @@ void Dblqh::initialiseRecordsLab(Signal* signal, Uint32 data, cactiveCopy[i] = RNIL; }//for cnoActiveCopy = 0; - cCounterAccCommitBlocked = 0; - cCounterTupCommitBlocked = 0; - caccCommitBlocked = false; - ctupCommitBlocked = false; - cCommitBlocked = false; ccurrentGcprec = RNIL; caddNodeState = ZFALSE; cstartRecReq = ZFALSE; - cnewestGci = (UintR)-1; - cnewestCompletedGci = (UintR)-1; + cnewestGci = ~0; + cnewestCompletedGci = ~0; crestartOldestGci = 0; crestartNewestGci = 0; csrPhaseStarted = ZSR_NO_PHASE_STARTED; @@ -15915,7 +16638,7 @@ void Dblqh::initLogpart(Signal* signal) * ========================================================================= */ void Dblqh::initLogPointers(Signal* signal) { - logPartPtr.i = tcConnectptr.p->hashValue & 3; + logPartPtr.i = tcConnectptr.p->m_log_part_ptr_i; ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord); logFilePtr.i = logPartPtr.p->currentLogfile; ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); @@ -15947,10 +16670,10 @@ void Dblqh::initReqinfoExecSr(Signal* signal) /* SET SIMPLE TRANSACTION */ /* ------------------------------------------------------------------------- */ LqhKeyReq::setSimpleFlag(Treqinfo, 1); + LqhKeyReq::setGCIFlag(Treqinfo, 1); /* ------------------------------------------------------------------------- */ /* SET OPERATION TYPE AND LOCK MODE (NEVER READ OPERATION OR SCAN IN LOG) */ /* ------------------------------------------------------------------------- */ - LqhKeyReq::setLockType(Treqinfo, regTcPtr->operation); LqhKeyReq::setOperation(Treqinfo, regTcPtr->operation); regTcPtr->reqinfo = Treqinfo; /* ------------------------------------------------------------------------ */ @@ -16437,6 +17160,8 @@ void Dblqh::readLogHeader(Signal* signal) tcConnectptr.p->operation = logPagePtr.p->logPageWord[logPos + 3]; tcConnectptr.p->totSendlenAi = logPagePtr.p->logPageWord[logPos + 4]; tcConnectptr.p->primKeyLen = logPagePtr.p->logPageWord[logPos + 5]; + tcConnectptr.p->m_row_id.m_page_no = logPagePtr.p->logPageWord[logPos + 6]; + tcConnectptr.p->m_row_id.m_page_idx = logPagePtr.p->logPageWord[logPos+ 7]; logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos + ZLOG_HEAD_SIZE; } else { jam(); @@ -16446,7 +17171,11 @@ void Dblqh::readLogHeader(Signal* signal) tcConnectptr.p->operation = readLogwordExec(signal); tcConnectptr.p->totSendlenAi = readLogwordExec(signal); tcConnectptr.p->primKeyLen = readLogwordExec(signal); + tcConnectptr.p->m_row_id.m_page_no = readLogwordExec(signal); + tcConnectptr.p->m_row_id.m_page_idx = readLogwordExec(signal); }//if + + tcConnectptr.p->m_use_rowid = (tcConnectptr.p->operation == ZINSERT); }//Dblqh::readLogHeader() /* ------------------------------------------------------------------------- */ @@ -16753,7 +17482,7 @@ Uint32 Dblqh::returnExecLog(Signal* signal) clfLogFilePtr.i = logPartPtr.p->execSrExecLogFile; ptrCheckGuard(clfLogFilePtr, clogFileFileSize, logFileRecord); clfLogFilePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_LOG; - closeFile(signal, clfLogFilePtr); + closeFile(signal, clfLogFilePtr, __LINE__); result = ZCLOSE_FILE; }//if }//if @@ -17352,9 +18081,8 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) sp.p->scanAiLength, sp.p->m_curr_batch_size_rows, sp.p->m_max_batch_size_rows); - infoEvent(" errCnt=%d, localFid=%d, schV=%d", + infoEvent(" errCnt=%d, schV=%d", sp.p->scanErrorCounter, - sp.p->scanLocalFragid, sp.p->scanSchemaVersion); infoEvent(" stpid=%d, flag=%d, lhold=%d, lmode=%d, num=%d", sp.p->scanStoredProcId, @@ -17535,7 +18263,6 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) << " m_max_batch_size_rows="<< TscanPtr.p->m_max_batch_size_rows << " scanErrorCounter="<<TscanPtr.p->scanErrorCounter - << " scanLocalFragid="<<TscanPtr.p->scanLocalFragid << endl; ndbout << " scanSchemaVersion="<<TscanPtr.p->scanSchemaVersion << " scanStoredProcId="<<TscanPtr.p->scanStoredProcId diff --git a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.cpp b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.cpp index 6eadefe5df5..0456c5d75e4 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.cpp @@ -128,7 +128,7 @@ bool PrepareOperationRecord::check() { if (m_operationType == 3 && m_attributeLength != 0) return false; - if (m_logRecordSize != (m_attributeLength + m_keyLength + 6)) + if (m_logRecordSize != (m_attributeLength + m_keyLength + 8)) return false; return true; @@ -165,6 +165,8 @@ NdbOut& operator<<(NdbOut& no, const PrepareOperationRecord& por) { default: printOut("operationType:", por.m_operationType); } + printOut("page_no: ", por.m_page_no); + printOut("page_idx: ", por.m_page_idx); printOut("attributeLength:", por.m_attributeLength); printOut("keyLength:", por.m_keyLength); diff --git a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.hpp b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.hpp index 06bf7a85d53..2792a81b058 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.hpp +++ b/storage/ndb/src/kernel/blocks/dblqh/redoLogReader/records.hpp @@ -92,6 +92,8 @@ protected: Uint32 m_operationType; // 0 READ, 1 UPDATE, 2 INSERT, 3 DELETE Uint32 m_attributeLength; Uint32 m_keyLength; + Uint32 m_page_no; + Uint32 m_page_idx; Uint32 *m_keyInfo; // In this order Uint32 *m_attrInfo;// In this order }; diff --git a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp index 5e9f6f6e6ba..da4629b5e79 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp +++ b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp @@ -892,11 +892,7 @@ public: */ Uint8 opExec; - /** - * LOCK TYPE OF OPERATION IF READ OPERATION - * 0 = READ LOCK, 1 = WRITE LOCK - */ - Uint8 opLock; + Uint8 unused; /** * IS THE OPERATION A SIMPLE TRANSACTION diff --git a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index 2645075abd9..b6c3ae82144 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -2807,17 +2807,9 @@ void Dbtc::execTCKEYREQ(Signal* signal) regCachePtr->attrinfo15[2] = Tdata4; regCachePtr->attrinfo15[3] = Tdata5; - if (TOperationType == ZREAD) { + if (TOperationType == ZREAD || TOperationType == ZREAD_EX) { Uint32 TreadCount = c_counters.creadCount; jam(); - regCachePtr->opLock = 0; - c_counters.creadCount = TreadCount + 1; - } else if(TOperationType == ZREAD_EX){ - Uint32 TreadCount = c_counters.creadCount; - jam(); - TOperationType = ZREAD; - regTcPtr->operation = ZREAD; - regCachePtr->opLock = ZUPDATE; c_counters.creadCount = TreadCount + 1; } else { if(regApiPtr->commitAckMarker == RNIL){ @@ -2851,24 +2843,10 @@ void Dbtc::execTCKEYREQ(Signal* signal) c_counters.cwriteCount = TwriteCount + 1; switch (TOperationType) { case ZUPDATE: - jam(); - if (TattrLen == 0) { - //TCKEY_abort(signal, 5); - //return; - }//if - /*---------------------------------------------------------------------*/ - // The missing break is intentional since we also want to set the opLock - // variable also for updates - /*---------------------------------------------------------------------*/ case ZINSERT: case ZDELETE: - jam(); - regCachePtr->opLock = TOperationType; - break; case ZWRITE: jam(); - // A write operation is originally an insert operation. - regCachePtr->opLock = ZINSERT; break; default: TCKEY_abort(signal, 9); @@ -3039,7 +3017,7 @@ void Dbtc::tckeyreq050Lab(Signal* signal) tnoOfStandby = (tnodeinfo >> 8) & 3; regCachePtr->fragmentDistributionKey = (tnodeinfo >> 16) & 255; - if (Toperation == ZREAD) { + if (Toperation == ZREAD || Toperation == ZREAD_EX) { if (Tdirty == 1) { jam(); /*-------------------------------------------------------------*/ @@ -3168,6 +3146,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal, TcConnectRecord * const regTcPtr = tcConnectptr.p; ApiConnectRecord * const regApiPtr = apiConnectptr.p; CacheRecord * const regCachePtr = cachePtr.p; + Uint32 version = getNodeInfo(refToNode(TBRef)).m_version; #ifdef ERROR_INSERT if (ERROR_INSERTED(8002)) { systemErrorLab(signal, __LINE__); @@ -3207,7 +3186,12 @@ void Dbtc::sendlqhkeyreq(Signal* signal, Tdata10 = 0; LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen); LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo); - LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock); + if (unlikely(version < NDBD_ROWID_VERSION)) + { + Uint32 op = regTcPtr->operation; + Uint32 lock = op == ZREAD_EX ? ZUPDATE : op == ZWRITE ? ZINSERT : op; + LqhKeyReq::setLockType(Tdata10, lock); + } /* ---------------------------------------------------------------------- */ // Indicate Application Reference is present in bit 15 /* ---------------------------------------------------------------------- */ diff --git a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp index a9fef297bef..fa898229322 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp +++ b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp @@ -177,7 +177,7 @@ inline const Uint32* ALIGN_WORD(const void* ptr) #define ZINSERT_ERROR 630 #define ZINVALID_CHAR_FORMAT 744 - +#define ZROWID_ALLOCATED 899 /* SOME WORD POSITIONS OF FIELDS IN SOME HEADERS */ @@ -235,8 +235,9 @@ inline const Uint32* ALIGN_WORD(const void* ptr) #define ZREL_FRAG 7 #define ZREPORT_MEMORY_USAGE 8 #define ZBUILD_INDEX 9 -#define ZFREE_EXTENT 10 -#define ZUNMAP_PAGES 11 +#define ZTUP_SCAN 10 +#define ZFREE_EXTENT 11 +#define ZUNMAP_PAGES 12 #define ZSCAN_PROCEDURE 0 #define ZCOPY_PROCEDURE 2 @@ -336,40 +337,106 @@ struct Fragoperrec { }; typedef Ptr<Fragoperrec> FragoperrecPtr; - // Position for use by scan - struct PagePos { + + typedef Tup_page Page; + typedef Ptr<Page> PagePtr; + + // Scan position + struct ScanPos { + enum Get { + Get_undef = 0, + Get_next_page, + Get_page, + Get_next_page_mm, + Get_page_mm, + Get_next_page_dd, + Get_page_dd, + Get_next_tuple, + Get_tuple, + Get_next_tuple_fs, + Get_tuple_fs + }; + Get m_get; // entry point in scanNext + Local_key m_key; // scan position pointer MM or DD + Page* m_page; // scanned MM or DD (cache) page + Local_key m_key_mm; // MM local key returned + Uint32 m_realpid_mm; // MM real page id Uint32 m_extent_info_ptr_i; - Local_key m_key; - bool m_match; }; - // Tup scan op (compare Dbtux::ScanOp) + // Scan Lock + struct ScanLock { + Uint32 m_accLockOp; + union { + Uint32 nextPool; + Uint32 nextList; + }; + Uint32 prevList; + }; + typedef Ptr<ScanLock> ScanLockPtr; + ArrayPool<ScanLock> c_scanLockPool; + + // Tup scan, similar to Tux scan. Later some of this could + // be moved to common superclass. struct ScanOp { - ScanOp() {} - enum { // state + ScanOp() : + m_state(Undef), + m_bits(0), + m_userPtr(RNIL), + m_userRef(RNIL), + m_tableId(RNIL), + m_fragId(~(Uint32)0), + m_fragPtrI(RNIL), + m_transId1(0), + m_transId2(0), + m_savePointId(0), + m_accLockOp(RNIL) + {} + + enum State { Undef = 0, First = 1, // before first entry - Locked = 4, // at current entry (no lock needed) + Current = 2, // at current before locking + Blocked = 3, // at current waiting for ACC lock + Locked = 4, // at current and locked or no lock needed Next = 5, // looking for next extry Last = 6, // after last entry + Aborting = 7, // lock wait at scan close Invalid = 9 // cannot return REF to LQH currently }; Uint16 m_state; - STATIC_CONST( SCAN_DD = 0x1 ); - STATIC_CONST( SCAN_VS = 0x2 ); - STATIC_CONST( SCAN_LCP = 0x4 ); - STATIC_CONST( SCAN_DD_VS = 0x8 ); + enum Bits { + SCAN_DD = 0x01, // scan disk pages + SCAN_VS = 0x02, // page format is var size + SCAN_LCP = 0x04, // LCP mem page scan + SCAN_LOCK_SH = 0x10, // lock mode shared + SCAN_LOCK_EX = 0x20, // lock mode exclusive + SCAN_LOCK_WAIT = 0x40, // lock wait + // any lock mode + SCAN_LOCK = SCAN_LOCK_SH | SCAN_LOCK_EX, + SCAN_NR = 0x80 // Node recovery scan + }; Uint16 m_bits; Uint32 m_userPtr; // scanptr.i in LQH Uint32 m_userRef; Uint32 m_tableId; - Uint32 m_fragId; // "base" fragment id + Uint32 m_fragId; Uint32 m_fragPtrI; Uint32 m_transId1; Uint32 m_transId2; - PagePos m_scanPos; + union { + Uint32 m_savePointId; + Uint32 m_scanGCI; + }; + // lock waited for or obtained and not yet passed to LQH + Uint32 m_accLockOp; + + ScanPos m_scanPos; + + DLFifoList<ScanLock>::Head m_accLockOps; + union { Uint32 nextPool; Uint32 nextList; @@ -379,8 +446,18 @@ typedef Ptr<Fragoperrec> FragoperrecPtr; typedef Ptr<ScanOp> ScanOpPtr; ArrayPool<ScanOp> c_scanOpPool; - typedef Tup_page Page; - typedef Ptr<Page> PagePtr; + void scanReply(Signal*, ScanOpPtr scanPtr); + void scanFirst(Signal*, ScanOpPtr scanPtr); + bool scanNext(Signal*, ScanOpPtr scanPtr); + void scanCont(Signal*, ScanOpPtr scanPtr); + void disk_page_tup_scan_callback(Signal*, Uint32 scanPtrI, Uint32 page_i); + void scanClose(Signal*, ScanOpPtr scanPtr); + void addAccLockOp(ScanOp& scan, Uint32 accLockOp); + void removeAccLockOp(ScanOp& scan, Uint32 accLockOp); + void releaseScanOp(ScanOpPtr& scanPtr); + + // for md5 of key (could maybe reuse existing temp buffer) + Uint64 c_dataBuffer[ZWORDS_ON_PAGE/2 + 1]; struct Page_request { @@ -410,6 +487,7 @@ typedef Ptr<Fragoperrec> FragoperrecPtr; struct Extent_info : public Extent_list_t { + Uint32 m_first_page_no; Local_key m_key; Uint32 m_free_space; Uint32 m_free_matrix_pos; @@ -440,6 +518,7 @@ typedef Ptr<Fragoperrec> FragoperrecPtr; Disk_alloc_info() {} Disk_alloc_info(const Tablerec* tabPtrP, Uint32 extent_size_in_pages); + Uint32 m_extent_size; /** * Disk allocation @@ -510,17 +589,18 @@ struct Fragrecord { Uint32 currentPageRange; Uint32 rootPageRange; Uint32 noOfPages; - Uint32 emptyPrimPage; - - Uint32 thFreeFirst; + DLList<Page>::Head emptyPrimPage; // allocated pages (not init) + DLList<Page>::Head thFreeFirst; // pages with atleast 1 free record + SLList<Page>::Head m_empty_pages; // Empty pages not in logical/physical map + Uint32 m_lcp_scan_op; State fragStatus; Uint32 fragTableId; Uint32 fragmentId; Uint32 nextfreefrag; - Uint32 free_var_page_array[MAX_FREE_LIST]; - + DLList<Page>::Head free_var_page_array[MAX_FREE_LIST]; + DLList<ScanOp>::Head m_scanList; bool m_undo_complete; @@ -530,11 +610,6 @@ struct Fragrecord { }; typedef Ptr<Fragrecord> FragrecordPtr; - void scanFirst(Signal* signal, Fragrecord*, ScanOpPtr scanPtr); - void scanNext(Signal* signal, Fragrecord*, ScanOpPtr scanPtr); - void scanClose(Signal* signal, ScanOpPtr scanPtr); - void releaseScanOp(ScanOpPtr& scanPtr); - struct Operationrec { /* @@ -796,7 +871,12 @@ ArrayPool<TupTriggerData> c_triggerPool; Uint32 tabDescriptor; Uint32 m_real_order_descriptor; - bool checksumIndicator; + enum Bits + { + TR_Checksum = 0x1, // Need to be 1 + TR_RowGCI = 0x2 + }; + Uint16 m_bits; Uint16 total_rec_size; // Max total size for entire tuple in words /** @@ -1105,7 +1185,7 @@ typedef Ptr<HostBuffer> HostBufferPtr; STATIC_CONST( MM_GROWN = 0x00400000 ); // Has MM part grown STATIC_CONST( FREE = 0x00800000 ); // On free list of page STATIC_CONST( LCP_SKIP = 0x01000000 ); // Should not be returned in LCP - + Uint32 get_tuple_version() const { return m_header_bits & TUP_VERSION_MASK; } @@ -1138,6 +1218,16 @@ typedef Ptr<HostBuffer> HostBufferPtr; const Uint32* get_disk_ref_ptr(const Tablerec* tabPtrP) const { return m_data + tabPtrP->m_offsets[MM].m_disk_ref_offset; } + + Uint32 *get_mm_gci(const Tablerec* tabPtrP){ + assert(tabPtrP->m_bits & Tablerec::TR_RowGCI); + return m_data + (tabPtrP->m_bits & Tablerec::TR_Checksum); + } + + Uint32 *get_dd_gci(const Tablerec* tabPtrP, Uint32 mm){ + assert(tabPtrP->m_bits & Tablerec::TR_RowGCI); + return m_data; + } }; struct KeyReqStruct { @@ -1179,13 +1269,15 @@ struct KeyReqStruct { } m_var_data[2]; Tuple_header *m_disk_ptr; - Page* m_page_ptr_p; - Var_page* m_varpart_page_ptr_p;// could be same as m_page_ptr_p + PagePtr m_page_ptr; + PagePtr m_varpart_page_ptr; // could be same as m_page_ptr_p PagePtr m_disk_page_ptr; // - + Local_key m_row_id; + bool dirty_op; bool interpreted_exec; bool last_row; + bool m_use_rowid; Signal* signal; Uint32 no_fired_triggers; @@ -1290,8 +1382,17 @@ public: int load_diskpage_scan(Signal*, Uint32 opRec, Uint32 fragPtrI, Uint32 local_key, Uint32 flags); + int alloc_page(Tablerec*, Fragrecord*, PagePtr*,Uint32 page_no); + void start_restore_lcp(Uint32 tableId, Uint32 fragmentId); void complete_restore_lcp(Uint32 tableId, Uint32 fragmentId); + + int nr_read_pk(Uint32 fragPtr, const Local_key*, Uint32* dataOut, bool©); + int nr_update_gci(Uint32 fragPtr, const Local_key*, Uint32 gci); + int nr_delete(Signal*, Uint32, Uint32 fragPtr, const Local_key*, Uint32 gci); + + void nr_delete_page_callback(Signal*, Uint32 op, Uint32 page); + void nr_delete_logbuffer_callback(Signal*, Uint32 op, Uint32 page); private: BLOCK_DEFINES(Dbtup); @@ -1333,6 +1434,9 @@ private: void execACC_SCANREQ(Signal* signal); void execNEXT_SCANREQ(Signal* signal); void execACC_CHECK_SCAN(Signal* signal); + void execACCKEYCONF(Signal* signal); + void execACCKEYREF(Signal* signal); + void execACC_ABORTCONF(Signal* signal); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -1542,7 +1646,7 @@ private: void handleATTRINFOforTUPKEYREQ(Signal* signal, const Uint32* data, Uint32 length, - Operationrec * const regOperPtr); + Operationrec * regOperPtr); // ***************************************************************** // Setting up the environment for reads, inserts, updates and deletes. @@ -1550,16 +1654,16 @@ private: //------------------------------------------------------------------ //------------------------------------------------------------------ int handleReadReq(Signal* signal, - Operationrec* const regOperPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Tablerec* regTabPtr, KeyReqStruct* req_struct); //------------------------------------------------------------------ //------------------------------------------------------------------ int handleUpdateReq(Signal* signal, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, KeyReqStruct* req_struct, bool disk); @@ -1568,23 +1672,23 @@ private: int handleInsertReq(Signal* signal, Ptr<Operationrec> regOperPtr, Ptr<Fragrecord>, - Tablerec* const regTabPtr, + Tablerec* regTabPtr, KeyReqStruct* req_struct); //------------------------------------------------------------------ //------------------------------------------------------------------ int handleDeleteReq(Signal* signal, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, KeyReqStruct* req_struct); //------------------------------------------------------------------ //------------------------------------------------------------------ int updateStartLab(Signal* signal, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, KeyReqStruct* req_struct); // ***************************************************************** @@ -1616,19 +1720,19 @@ private: void sendReadAttrinfo(Signal* signal, KeyReqStruct *req_struct, Uint32 TnoOfData, - const Operationrec * const regOperPtr); + const Operationrec * regOperPtr); //------------------------------------------------------------------ //------------------------------------------------------------------ void sendLogAttrinfo(Signal* signal, Uint32 TlogSize, - Operationrec * const regOperPtr); + Operationrec * regOperPtr); //------------------------------------------------------------------ //------------------------------------------------------------------ void sendTUPKEYCONF(Signal* signal, KeyReqStruct *req_struct, - Operationrec * const regOperPtr); + Operationrec * regOperPtr); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -1843,7 +1947,7 @@ private: //------------------------------------------------------------------ //------------------------------------------------------------------ - void setUpQueryRoutines(Tablerec* const regTabPtr); + void setUpQueryRoutines(Tablerec* regTabPtr); // ***************************************************************** // Service methods. @@ -1863,7 +1967,7 @@ private: //------------------------------------------------------------------ //------------------------------------------------------------------ - void copyAttrinfo(Operationrec * const regOperPtr, Uint32* inBuffer); + void copyAttrinfo(Operationrec * regOperPtr, Uint32* inBuffer); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -1875,7 +1979,7 @@ private: //------------------------------------------------------------------ //------------------------------------------------------------------ - int initStoredOperationrec(Operationrec* const regOperPtr, + int initStoredOperationrec(Operationrec* regOperPtr, KeyReqStruct* req_struct, Uint32 storedId); @@ -1905,57 +2009,57 @@ private: void checkImmediateTriggersAfterInsert(KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Tablerec* const tablePtr); + Operationrec* regOperPtr, + Tablerec* tablePtr); void checkImmediateTriggersAfterUpdate(KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Tablerec* const tablePtr); + Operationrec* regOperPtr, + Tablerec* tablePtr); void checkImmediateTriggersAfterDelete(KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Tablerec* const tablePtr); + Operationrec* regOperPtr, + Tablerec* tablePtr); #if 0 void checkDeferredTriggers(Signal* signal, - Operationrec* const regOperPtr, - Tablerec* const regTablePtr); + Operationrec* regOperPtr, + Tablerec* regTablePtr); #endif void checkDetachedTriggers(KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Tablerec* const regTablePtr); + Operationrec* regOperPtr, + Tablerec* regTablePtr); void fireImmediateTriggers(KeyReqStruct *req_struct, ArrayList<TupTriggerData>& triggerList, - Operationrec* const regOperPtr); + Operationrec* regOperPtr); void fireDeferredTriggers(KeyReqStruct *req_struct, ArrayList<TupTriggerData>& triggerList, - Operationrec* const regOperPtr); + Operationrec* regOperPtr); void fireDetachedTriggers(KeyReqStruct *req_struct, ArrayList<TupTriggerData>& triggerList, - Operationrec* const regOperPtr); + Operationrec* regOperPtr); void executeTriggers(KeyReqStruct *req_struct, ArrayList<TupTriggerData>& triggerList, - Operationrec* const regOperPtr); + Operationrec* regOperPtr); void executeTrigger(KeyReqStruct *req_struct, - TupTriggerData* const trigPtr, - Operationrec* const regOperPtr); - - bool readTriggerInfo(TupTriggerData* const trigPtr, - Operationrec* const regOperPtr, - KeyReqStruct * const req_struct, - Fragrecord* const regFragPtr, - Uint32* const keyBuffer, + TupTriggerData* trigPtr, + Operationrec* regOperPtr); + + bool readTriggerInfo(TupTriggerData* trigPtr, + Operationrec* regOperPtr, + KeyReqStruct * req_struct, + Fragrecord* regFragPtr, + Uint32* keyBuffer, Uint32& noPrimKey, - Uint32* const afterBuffer, + Uint32* afterBuffer, Uint32& noAfterWords, - Uint32* const beforeBuffer, + Uint32* beforeBuffer, Uint32& noBeforeWords); void sendTrigAttrInfo(Signal* signal, @@ -1970,8 +2074,8 @@ private: void sendFireTrigOrd(Signal* signal, KeyReqStruct *req_struct, - Operationrec * const regOperPtr, - TupTriggerData* const trigPtr, + Operationrec * regOperPtr, + TupTriggerData* trigPtr, Uint32 fragmentId, Uint32 noPrimKeySignals, Uint32 noBeforeSignals, @@ -1982,19 +2086,19 @@ private: // these set terrorCode and return non-zero on error int executeTuxInsertTriggers(Signal* signal, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr); + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr); int executeTuxUpdateTriggers(Signal* signal, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr); + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr); int executeTuxDeleteTriggers(Signal* signal, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr); + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr); int addTuxEntries(Signal* signal, Operationrec* regOperPtr, @@ -2004,16 +2108,15 @@ private: void executeTuxCommitTriggers(Signal* signal, Operationrec* regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr); + Fragrecord* regFragPtr, + Tablerec* regTabPtr); void executeTuxAbortTriggers(Signal* signal, Operationrec* regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr); + Fragrecord* regFragPtr, + Tablerec* regTabPtr); void removeTuxEntries(Signal* signal, - Operationrec* regOperPtr, Tablerec* regTabPtr); // ***************************************************************** @@ -2092,7 +2195,7 @@ private: //------------------------------------------------------------------ #if 0 - void checkPages(Fragrecord* const regFragPtr); + void checkPages(Fragrecord* regFragPtr); #endif Uint32 convert_byte_to_word_size(Uint32 byte_size) { @@ -2106,59 +2209,53 @@ private: void prepare_initial_insert(KeyReqStruct*, Operationrec*, Tablerec*); void fix_disk_insert_no_mem_insert(KeyReqStruct*, Operationrec*, Tablerec*); void setup_fixed_part(KeyReqStruct* req_struct, - Operationrec* const regOperPtr, - Tablerec* const regTabPtr); + Operationrec* regOperPtr, + Tablerec* regTabPtr); void send_TUPKEYREF(Signal* signal, - Operationrec* const regOperPtr); + Operationrec* regOperPtr); void early_tupkey_error(Signal* signal); void printoutTuplePage(Uint32 fragid, Uint32 pageid, Uint32 printLimit); bool checkUpdateOfPrimaryKey(KeyReqStruct *req_struct, Uint32* updateBuffer, - Tablerec* const regTabPtr); + Tablerec* regTabPtr); - void setNullBits(Uint32*, Tablerec* const regTabPtr); + void setNullBits(Uint32*, Tablerec* regTabPtr); bool checkNullAttributes(KeyReqStruct * const, Tablerec* const); bool setup_read(KeyReqStruct* req_struct, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, bool disk); - bool getPageLastCommitted(Operationrec* const regOperPtr, - Operationrec* const leaderOpPtr); - - bool getPageThroughSavePoint(Operationrec* const regOperPtr, - Operationrec* const leaderOpPtr); - - Uint32 calculateChecksum(Tuple_header*, Tablerec* const regTabPtr); - void setChecksum(Tuple_header*, Tablerec* const regTabPtr); + Uint32 calculateChecksum(Tuple_header*, Tablerec* regTabPtr); + void setChecksum(Tuple_header*, Tablerec* regTabPtr); void complexTrigger(Signal* signal, KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr); + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr); - void setTupleStatesSetOpType(Operationrec* const regOperPtr, + void setTupleStatesSetOpType(Operationrec* regOperPtr, KeyReqStruct *req_struct, - Page* const pagePtr, + Page* pagePtr, Uint32& opType, OperationrecPtr& firstOpPtr); void findBeforeValueOperation(OperationrecPtr& befOpPtr, OperationrecPtr firstOpPtr); - void calculateChangeMask(Page* const PagePtr, - Tablerec* const regTabPtr, - KeyReqStruct * const req_struct); + void calculateChangeMask(Page* PagePtr, + Tablerec* regTabPtr, + KeyReqStruct * req_struct); void updateGcpId(KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr); + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr); void setTupleStateOnPreviousOps(Uint32 prevOpIndex); void copyMem(Signal* signal, Uint32 sourceIndex, Uint32 destIndex); @@ -2170,14 +2267,14 @@ private: void updatePackedList(Signal* signal, Uint16 ahostIndex); void setUpDescriptorReferences(Uint32 descriptorReference, - Tablerec* const regTabPtr, + Tablerec* regTabPtr, const Uint32* offset); - void setUpKeyArray(Tablerec* const regTabPtr); - bool addfragtotab(Tablerec* const regTabPtr, Uint32 fragId, Uint32 fragIndex); - void deleteFragTab(Tablerec* const regTabPtr, Uint32 fragId); + void setUpKeyArray(Tablerec* regTabPtr); + bool addfragtotab(Tablerec* regTabPtr, Uint32 fragId, Uint32 fragIndex); + void deleteFragTab(Tablerec* regTabPtr, Uint32 fragId); void abortAddFragOp(Signal* signal); - void releaseTabDescr(Tablerec* const regTabPtr); - void getFragmentrec(FragrecordPtr& regFragPtr, Uint32 fragId, Tablerec* const regTabPtr); + void releaseTabDescr(Tablerec* regTabPtr); + void getFragmentrec(FragrecordPtr& regFragPtr, Uint32 fragId, Tablerec* regTabPtr); void initialiseRecordsLab(Signal* signal, Uint32 switchData, Uint32, Uint32); void initializeAttrbufrec(); @@ -2194,7 +2291,7 @@ private: void initializeTabDescr(); void initializeUndoPage(); - void initTab(Tablerec* const regTabPtr); + void initTab(Tablerec* regTabPtr); void startphase3Lab(Signal* signal, Uint32 config1, Uint32 config2); @@ -2204,17 +2301,17 @@ private: void fragrefuse3Lab(Signal* signal, FragoperrecPtr fragOperPtr, FragrecordPtr regFragPtr, - Tablerec* const regTabPtr, + Tablerec* regTabPtr, Uint32 fragId); void fragrefuse4Lab(Signal* signal, FragoperrecPtr fragOperPtr, FragrecordPtr regFragPtr, - Tablerec* const regTabPtr, + Tablerec* regTabPtr, Uint32 fragId); void addattrrefuseLab(Signal* signal, FragrecordPtr regFragPtr, FragoperrecPtr fragOperPtr, - Tablerec* const regTabPtr, + Tablerec* regTabPtr, Uint32 fragId); @@ -2291,24 +2388,25 @@ private: //------------------------------------------------------------------------------------------------------ // // Public methods - Uint32 getRealpid(Fragrecord* const regFragPtr, Uint32 logicalPageId); - Uint32 getNoOfPages(Fragrecord* const regFragPtr); + Uint32 getRealpid(Fragrecord* regFragPtr, Uint32 logicalPageId); + Uint32 getNoOfPages(Fragrecord* regFragPtr); void initPageRangeSize(Uint32 size); - bool insertPageRangeTab(Fragrecord* const regFragPtr, + bool insertPageRangeTab(Fragrecord* regFragPtr, Uint32 startPageId, Uint32 noPages); - void releaseFragPages(Fragrecord* const regFragPtr); - void initFragRange(Fragrecord* const regFragPtr); + void releaseFragPages(Fragrecord* regFragPtr); + void initFragRange(Fragrecord* regFragPtr); void initializePageRange(); - Uint32 getEmptyPage(Fragrecord* const regFragPtr); - Uint32 allocFragPages(Fragrecord* const regFragPtr, Uint32 noOfPagesAllocated); - + Uint32 getEmptyPage(Fragrecord* regFragPtr); + Uint32 allocFragPages(Fragrecord* regFragPtr, Uint32 noOfPagesAllocated); + Uint32 get_empty_var_page(Fragrecord* frag_ptr); + // Private methods - Uint32 leafPageRangeFull(Fragrecord* const regFragPtr, PageRangePtr currPageRangePtr); + Uint32 leafPageRangeFull(Fragrecord* regFragPtr, PageRangePtr currPageRangePtr); void releasePagerange(PageRangePtr regPRPtr); void seizePagerange(PageRangePtr& regPageRangePtr); void errorHandler(Uint32 errorCode); - void allocMoreFragPages(Fragrecord* const regFragPtr); + void allocMoreFragPages(Fragrecord* regFragPtr); // Private data Uint32 cfirstfreerange; @@ -2328,7 +2426,7 @@ private: // Private methods Uint32 get_alloc_page(Fragrecord* const, Uint32); - void update_free_page_list(Fragrecord* const, Var_page*); + void update_free_page_list(Fragrecord* const, Ptr<Page>); #if 0 Uint32 calc_free_list(const Tablerec* regTabPtr, Uint32 sz) const { @@ -2346,10 +2444,11 @@ private: //--------------------------------------------------------------- // // Public methods - Uint32* alloc_var_rec(Fragrecord*const, Tablerec*const, Uint32, Local_key*, - Uint32*, Uint32 base); - void free_var_part(Fragrecord*, Tablerec*, Var_part_ref, Uint32 chain); - void free_var_part(Fragrecord*, Tablerec*, Local_key*, Var_page*, Uint32 chain); + Uint32* alloc_var_rec(Fragrecord*, Tablerec*, Uint32, Local_key*, Uint32*); + void free_var_rec(Fragrecord*, Tablerec*, Local_key*, Ptr<Page>); + Uint32* alloc_var_part(Fragrecord*, Tablerec*, Uint32, Local_key*); + int realloc_var_part(Fragrecord*, Tablerec*, + PagePtr, Var_part_ref*, Uint32, Uint32); void validate_page(Tablerec*, Var_page* page); @@ -2357,15 +2456,18 @@ private: Uint32*); void free_fix_rec(Fragrecord*, Tablerec*, Local_key*, Fix_page*); + Uint32* alloc_fix_rowid(Fragrecord*, Tablerec*, Local_key*, Uint32 *); + Uint32* alloc_var_rowid(Fragrecord*, Tablerec*, Uint32, Local_key*, Uint32*); // Private methods - void convertThPage(Uint32 Tupheadsize, - Fix_page* const regPagePtr); + void convertThPage(Fix_page* regPagePtr, + Tablerec*, + Uint32 mm); /** * Return offset */ - Uint32 alloc_tuple_from_page(Fragrecord* const regFragPtr, - Fix_page* const regPagePtr); + Uint32 alloc_tuple_from_page(Fragrecord* regFragPtr, + Fix_page* regPagePtr); //--------------------------------------------------------------- // Temporary variables used for storing commonly used variables @@ -2398,8 +2500,7 @@ private: ArrayPool<Operationrec> c_operation_pool; - Page *cpage; - Uint32 cnoOfPage; + ArrayPool<Page> c_page_pool; Uint32 cnoOfAllocatedPages; Tablerec *tablerec; @@ -2437,9 +2538,6 @@ private: // Trigger variables Uint32 c_maxTriggersPerTable; - STATIC_CONST(MAX_PARALLELL_TUP_SRREQ = 2); - Uint32 c_sr_free_page_0; - Uint32 c_errorInsert4000TableId; Uint32 c_min_list_size[MAX_FREE_LIST + 1]; Uint32 c_max_list_size[MAX_FREE_LIST + 1]; @@ -2462,9 +2560,9 @@ private: bool disk); Uint32* get_ptr(Var_part_ref); - Uint32* get_ptr(Ptr<Var_page>*, Var_part_ref); + Uint32* get_ptr(PagePtr*, Var_part_ref); Uint32* get_ptr(PagePtr*, const Local_key*, const Tablerec*); - Uint32* get_ptr(PagePtr*, const Local_key*, const Tablerec*, Uint32 mm); + Uint32* get_dd_ptr(PagePtr*, const Local_key*, const Tablerec*); /** * prealloc space from disk @@ -2561,7 +2659,7 @@ private: #endif void fix_commit_order(OperationrecPtr); - void commit_operation(Signal*, Uint32, Tuple_header*, Page*, + void commit_operation(Signal*, Uint32, Tuple_header*, PagePtr, Operationrec*, Fragrecord*, Tablerec*); void dealloc_tuple(Signal* signal, Uint32, Page*, Tuple_header*, @@ -2570,8 +2668,8 @@ private: int handle_size_change_after_update(KeyReqStruct* req_struct, Tuple_header* org, Operationrec*, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, Uint32 sizes[4]); /** @@ -2581,6 +2679,7 @@ private: void prepare_read(KeyReqStruct*, Tablerec* const, bool disk); }; +#if 0 inline Uint32 Dbtup::get_frag_page_id(Uint32 real_page_id) @@ -2590,17 +2689,18 @@ Dbtup::get_frag_page_id(Uint32 real_page_id) ptrCheckGuard(real_page_ptr, cnoOfPage, cpage); return real_page_ptr.p->frag_page_id; } +#endif inline Dbtup::TransState -Dbtup::get_trans_state(Operationrec * const regOperPtr) +Dbtup::get_trans_state(Operationrec * regOperPtr) { return (Dbtup::TransState)regOperPtr->op_struct.trans_state; } inline void -Dbtup::set_trans_state(Operationrec* const regOperPtr, +Dbtup::set_trans_state(Operationrec* regOperPtr, Dbtup::TransState trans_state) { regOperPtr->op_struct.trans_state= (Uint32)trans_state; @@ -2608,14 +2708,14 @@ Dbtup::set_trans_state(Operationrec* const regOperPtr, inline Dbtup::TupleState -Dbtup::get_tuple_state(Operationrec * const regOperPtr) +Dbtup::get_tuple_state(Operationrec * regOperPtr) { return (Dbtup::TupleState)regOperPtr->op_struct.tuple_state; } inline void -Dbtup::set_tuple_state(Operationrec* const regOperPtr, +Dbtup::set_tuple_state(Operationrec* regOperPtr, Dbtup::TupleState tuple_state) { regOperPtr->op_struct.tuple_state= (Uint32)tuple_state; @@ -2631,14 +2731,14 @@ Dbtup::decr_tup_version(Uint32 tup_version) inline Dbtup::ChangeMaskState -Dbtup::get_change_mask_state(Operationrec * const regOperPtr) +Dbtup::get_change_mask_state(Operationrec * regOperPtr) { return (Dbtup::ChangeMaskState)regOperPtr->op_struct.change_mask_state; } inline void -Dbtup::set_change_mask_state(Operationrec * const regOperPtr, +Dbtup::set_change_mask_state(Operationrec * regOperPtr, ChangeMaskState new_state) { regOperPtr->op_struct.change_mask_state= (Uint32)new_state; @@ -2646,8 +2746,8 @@ Dbtup::set_change_mask_state(Operationrec * const regOperPtr, inline void -Dbtup::update_change_mask_info(KeyReqStruct * const req_struct, - Operationrec * const regOperPtr) +Dbtup::update_change_mask_info(KeyReqStruct * req_struct, + Operationrec * regOperPtr) { //Save change mask if (req_struct->max_attr_id_updated == 0) { @@ -2667,19 +2767,19 @@ inline Uint32* Dbtup::get_ptr(Var_part_ref ref) { - Ptr<Var_page> tmp; + Ptr<Page> tmp; return get_ptr(&tmp, ref); } inline Uint32* -Dbtup::get_ptr(Ptr<Var_page>* pagePtr, Var_part_ref ref) +Dbtup::get_ptr(Ptr<Page>* pagePtr, Var_part_ref ref) { PagePtr tmp; Uint32 page_idx= ref.m_ref & MAX_TUPLES_PER_PAGE; tmp.i= ref.m_ref >> MAX_TUPLES_BITS; - ptrCheckGuard(tmp, cnoOfPage, cpage); + c_page_pool.getPtr(tmp); memcpy(pagePtr, &tmp, sizeof(tmp)); return ((Var_page*)tmp.p)->get_ptr(page_idx); } @@ -2691,38 +2791,28 @@ Dbtup::get_ptr(PagePtr* pagePtr, { PagePtr tmp; tmp.i= key->m_page_no; - ptrCheckGuard(tmp, cnoOfPage, cpage); + c_page_pool.getPtr(tmp); memcpy(pagePtr, &tmp, sizeof(tmp)); - if(regTabPtr->m_attributes[MM].m_no_of_varsize) - return ((Var_page*)tmp.p)->get_ptr(key->m_page_idx); - else - return ((Fix_page*)tmp.p)-> - get_ptr(key->m_page_idx, regTabPtr->m_offsets[MM].m_fix_header_size); + return ((Fix_page*)tmp.p)-> + get_ptr(key->m_page_idx, regTabPtr->m_offsets[MM].m_fix_header_size); } inline Uint32* -Dbtup::get_ptr(PagePtr* pagePtr, - const Local_key* key, const Tablerec* regTabPtr, Uint32 mm) +Dbtup::get_dd_ptr(PagePtr* pagePtr, + const Local_key* key, const Tablerec* regTabPtr) { PagePtr tmp; tmp.i= key->m_page_no; - if(mm == MM) - { - ptrCheckGuard(tmp, cnoOfPage, cpage); - } - else - { - tmp.p= (Page*)m_global_page_pool.getPtr(tmp.i); - } + tmp.p= (Page*)m_global_page_pool.getPtr(tmp.i); memcpy(pagePtr, &tmp, sizeof(tmp)); - if(regTabPtr->m_attributes[mm].m_no_of_varsize) + if(regTabPtr->m_attributes[DD].m_no_of_varsize) return ((Var_page*)tmp.p)->get_ptr(key->m_page_idx); else return ((Fix_page*)tmp.p)-> - get_ptr(key->m_page_idx, regTabPtr->m_offsets[mm].m_fix_header_size); + get_ptr(key->m_page_idx, regTabPtr->m_offsets[DD].m_fix_header_size); } NdbOut& diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupAbort.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupAbort.cpp index bb6d171b202..77b125b7403 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupAbort.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupAbort.cpp @@ -132,45 +132,48 @@ void Dbtup::execTUP_ABORTREQ(Signal* signal) disk_page_abort_prealloc(signal, regFragPtr.p, &key, key.m_page_idx); } - Uint32 bits= copy->m_header_bits; + Uint32 bits= tuple_ptr->m_header_bits; + Uint32 copy_bits= copy->m_header_bits; if(! (bits & Tuple_header::ALLOC)) { - if(bits & Tuple_header::MM_GROWN) + if(copy_bits & Tuple_header::MM_GROWN) { ndbout_c("abort grow"); - Var_page *pageP= (Var_page*)page.p; - Uint32 idx= regOperPtr.p->m_tuple_location.m_page_idx, sz; + Ptr<Page> vpage; + Uint32 idx= regOperPtr.p->m_tuple_location.m_page_idx; Uint32 mm_vars= regTabPtr.p->m_attributes[MM].m_no_of_varsize; Uint32 *var_part; - if(! (tuple_ptr->m_header_bits & Tuple_header::CHAINED_ROW)) - { - var_part= tuple_ptr->get_var_part_ptr(regTabPtr.p); - sz= Tuple_header::HeaderSize + - regTabPtr.p->m_offsets[MM].m_fix_header_size; - } - else - { - Ptr<Var_page> vpage; - Uint32 ref= * tuple_ptr->get_var_part_ptr(regTabPtr.p); - Local_key tmp; - tmp.assref(ref); - - sz= 0; + + ndbassert(tuple_ptr->m_header_bits & Tuple_header::CHAINED_ROW); + + Uint32 ref= * tuple_ptr->get_var_part_ptr(regTabPtr.p); + Local_key tmp; + tmp.assref(ref); + idx= tmp.m_page_idx; var_part= get_ptr(&vpage, *(Var_part_ref*)&ref); - pageP= vpage.p; - } + Var_page* pageP = (Var_page*)vpage.p; Uint32 len= pageP->get_entry_len(idx) & ~Var_page::CHAIN; - sz += ((((mm_vars + 1) << 1) + (((Uint16*)var_part)[mm_vars]) + 3)>> 2); + Uint32 sz = ((((mm_vars + 1) << 1) + (((Uint16*)var_part)[mm_vars]) + 3)>> 2); ndbassert(sz <= len); pageP->shrink_entry(idx, sz); - update_free_page_list(regFragPtr.p, pageP); + update_free_page_list(regFragPtr.p, vpage); } else if(bits & Tuple_header::MM_SHRINK) { ndbout_c("abort shrink"); } } + else if (regOperPtr.p->is_first_operation() && + regOperPtr.p->is_last_operation()) + { + /** + * Aborting last operation that performed ALLOC + */ + ndbout_c("clearing ALLOC"); + tuple_ptr->m_header_bits &= ~(Uint32)Tuple_header::ALLOC; + tuple_ptr->m_header_bits |= Tuple_header::FREE; + } } if(regOperPtr.p->is_first_operation() && regOperPtr.p->is_last_operation()) @@ -338,9 +341,15 @@ void Dbtup::tupkeyErrorLab(Signal* signal) c_lgman->free_log_space(fragPtr.p->m_logfile_group_id, regOperPtr->m_undo_buffer_space); } - - PagePtr tmp; - Uint32 *ptr= get_ptr(&tmp, ®OperPtr->m_tuple_location, tabPtr.p); + + Uint32 *ptr = 0; + if (!regOperPtr->m_tuple_location.isNull()) + { + PagePtr tmp; + ptr= get_ptr(&tmp, ®OperPtr->m_tuple_location, tabPtr.p); + } + + removeActiveOpList(regOperPtr, (Tuple_header*)ptr); initOpConnection(regOperPtr); send_TUPKEYREF(signal, regOperPtr); diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp index 7ad43a89d63..8ad6d8e2231 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp @@ -55,14 +55,7 @@ void Dbtup::execTUP_DEALLOCREQ(Signal* signal) if (regTabPtr.p->m_attributes[MM].m_no_of_varsize) { ljam(); - - if(ptr->m_header_bits & Tuple_header::CHAINED_ROW) - { - free_var_part(regFragPtr.p, regTabPtr.p, - *(Var_part_ref*)ptr->get_var_part_ptr(regTabPtr.p), - Var_page::CHAIN); - } - free_var_part(regFragPtr.p, regTabPtr.p, &tmp, (Var_page*)pagePtr.p, 0); + free_var_rec(regFragPtr.p, regTabPtr.p, &tmp, pagePtr); } else { free_fix_rec(regFragPtr.p, regTabPtr.p, &tmp, (Fix_page*)pagePtr.p); } @@ -156,15 +149,19 @@ Dbtup::dealloc_tuple(Signal* signal, Fragrecord* regFragPtr, Tablerec* regTabPtr) { + ptr->m_header_bits |= Tuple_header::FREE; if (ptr->m_header_bits & Tuple_header::DISK_PART) { Local_key disk; memcpy(&disk, ptr->get_disk_ref_ptr(regTabPtr), sizeof(disk)); - Ptr<GlobalPage> disk_page; - m_global_page_pool.getPtr(disk_page, - regOperPtr->m_commit_disk_callback_page); disk_page_free(signal, regTabPtr, regFragPtr, - &disk, *(PagePtr*)&disk_page, gci); + &disk, *(PagePtr*)&m_pgman.m_ptr, gci); + } + + if (regTabPtr->m_bits & Tablerec::TR_RowGCI) + { + jam(); + * ptr->get_mm_gci(regTabPtr) = gci; } } @@ -181,7 +178,7 @@ void Dbtup::commit_operation(Signal* signal, Uint32 gci, Tuple_header* tuple_ptr, - Page* page, + PagePtr pagePtr, Operationrec* regOperPtr, Fragrecord* regFragPtr, Tablerec* regTabPtr) @@ -197,76 +194,39 @@ Dbtup::commit_operation(Signal* signal, Uint32 copy_bits= copy->m_header_bits; - Uint32 fix_size= regTabPtr->m_offsets[MM].m_fix_header_size; + Uint32 fixsize= regTabPtr->m_offsets[MM].m_fix_header_size; Uint32 mm_vars= regTabPtr->m_attributes[MM].m_no_of_varsize; if(mm_vars == 0) { - memcpy(tuple_ptr, copy, 4*fix_size); - //ndbout_c("commit: memcpy %p %p %d", tuple_ptr, copy, 4*fix_size); - disk_ptr= (Tuple_header*)(((Uint32*)copy)+fix_size); + memcpy(tuple_ptr, copy, 4*fixsize); + disk_ptr= (Tuple_header*)(((Uint32*)copy)+fixsize); } - else if(bits & Tuple_header::CHAINED_ROW) + else { Uint32 *ref= tuple_ptr->get_var_part_ptr(regTabPtr); - memcpy(tuple_ptr, copy, 4*(Tuple_header::HeaderSize+fix_size)); - + memcpy(tuple_ptr, copy, 4*(Tuple_header::HeaderSize+fixsize)); + Local_key tmp; tmp.assref(*ref); - if(0) printf("%p %d %d (%d bytes) - ref: %x ", tuple_ptr, - regOperPtr->m_tuple_location.m_page_no, - regOperPtr->m_tuple_location.m_page_idx, - 4*(Tuple_header::HeaderSize+fix_size), - *ref); - Ptr<Var_page> vpagePtr; + + PagePtr vpagePtr; Uint32 *dst= get_ptr(&vpagePtr, *(Var_part_ref*)ref); + Var_page* vpagePtrP = (Var_page*)vpagePtr.p; Uint32 *src= copy->get_var_part_ptr(regTabPtr); Uint32 sz= ((mm_vars + 1) << 1) + (((Uint16*)src)[mm_vars]); - ndbassert(4*vpagePtr.p->get_entry_len(tmp.m_page_idx) >= sz); + ndbassert(4*vpagePtrP->get_entry_len(tmp.m_page_idx) >= sz); memcpy(dst, src, sz); - if(0) printf("ptr: %p %d ref: %x - chain commit", dst, sz, *ref); + copy_bits |= Tuple_header::CHAINED_ROW; - if(0) - { - for(Uint32 i = 0; i<((sz+3)>>2); i++) - printf(" %.8x", src[i]); - printf("\n"); - } - if(copy_bits & Tuple_header::MM_SHRINK) { - if(0) printf(" - shrink %d -> %d - ", - vpagePtr.p->get_entry_len(tmp.m_page_idx), (sz + 3) >> 2); - vpagePtr.p->shrink_entry(tmp.m_page_idx, (sz + 3) >> 2); - if(0)ndbout_c("%p->shrink_entry(%d, %d)", vpagePtr.p, tmp.m_page_idx, - (sz + 3) >> 2); - update_free_page_list(regFragPtr, vpagePtr.p); + vpagePtrP->shrink_entry(tmp.m_page_idx, (sz + 3) >> 2); + update_free_page_list(regFragPtr, vpagePtr); } - if(0) ndbout_c(""); + disk_ptr = (Tuple_header*) - (((Uint32*)copy)+Tuple_header::HeaderSize+fix_size+((sz + 3) >> 2)); + (((Uint32*)copy)+Tuple_header::HeaderSize+fixsize+((sz + 3) >> 2)); } - else - { - Uint32 *var_part= copy->get_var_part_ptr(regTabPtr); - Uint32 sz= Tuple_header::HeaderSize + fix_size + - ((((mm_vars + 1) << 1) + (((Uint16*)var_part)[mm_vars]) + 3)>> 2); - ndbassert(((Var_page*)page)-> - get_entry_len(regOperPtr->m_tuple_location.m_page_idx) >= sz); - memcpy(tuple_ptr, copy, 4*sz); - if(0) ndbout_c("%p %d %d (%d bytes)", tuple_ptr, - regOperPtr->m_tuple_location.m_page_no, - regOperPtr->m_tuple_location.m_page_idx, - 4*sz); - if(copy_bits & Tuple_header::MM_SHRINK) - { - ((Var_page*)page)->shrink_entry(regOperPtr->m_tuple_location.m_page_idx, - sz); - if(0)ndbout_c("%p->shrink_entry(%d, %d)", - page, regOperPtr->m_tuple_location.m_page_idx, sz); - update_free_page_list(regFragPtr, (Var_page*)page); - } - disk_ptr = (Tuple_header*)(((Uint32*)copy)+sz); - } if (regTabPtr->m_no_of_disk_attributes && (copy_bits & Tuple_header::DISK_INLINE)) @@ -276,13 +236,13 @@ Dbtup::commit_operation(Signal* signal, Uint32 logfile_group_id= regFragPtr->m_logfile_group_id; Uint32 lcpScan_ptr_i= regFragPtr->m_lcp_scan_op; - PagePtr pagePtr = *(PagePtr*)&m_pgman.m_ptr; - ndbassert(pagePtr.p->m_page_no == key.m_page_no); - ndbassert(pagePtr.p->m_file_no == key.m_file_no); + PagePtr diskPagePtr = *(PagePtr*)&m_pgman.m_ptr; + ndbassert(diskPagePtr.p->m_page_no == key.m_page_no); + ndbassert(diskPagePtr.p->m_file_no == key.m_file_no); Uint32 sz, *dst; if(copy_bits & Tuple_header::DISK_ALLOC) { - disk_page_alloc(signal, regTabPtr, regFragPtr, &key, pagePtr, gci); + disk_page_alloc(signal, regTabPtr, regFragPtr, &key, diskPagePtr, gci); if(lcpScan_ptr_i != RNIL) { @@ -301,17 +261,18 @@ Dbtup::commit_operation(Signal* signal, if(regTabPtr->m_attributes[DD].m_no_of_varsize == 0) { sz= regTabPtr->m_offsets[DD].m_fix_header_size; - dst= ((Fix_page*)pagePtr.p)->get_ptr(key.m_page_idx, sz); + dst= ((Fix_page*)diskPagePtr.p)->get_ptr(key.m_page_idx, sz); } else { - dst= ((Var_page*)pagePtr.p)->get_ptr(key.m_page_idx); - sz= ((Var_page*)pagePtr.p)->get_entry_len(key.m_page_idx); + dst= ((Var_page*)diskPagePtr.p)->get_ptr(key.m_page_idx); + sz= ((Var_page*)diskPagePtr.p)->get_entry_len(key.m_page_idx); } if(! (copy_bits & Tuple_header::DISK_ALLOC)) { - disk_page_undo_update(pagePtr.p, &key, dst, sz, gci, logfile_group_id); + disk_page_undo_update(diskPagePtr.p, + &key, dst, sz, gci, logfile_group_id); } memcpy(dst, disk_ptr, 4*sz); @@ -320,10 +281,10 @@ Dbtup::commit_operation(Signal* signal, ndbassert(! (disk_ptr->m_header_bits & Tuple_header::FREE)); copy_bits |= Tuple_header::DISK_PART; } - + Uint32 clear= - Tuple_header::ALLOC | + Tuple_header::ALLOC | Tuple_header::FREE | Tuple_header::DISK_ALLOC | Tuple_header::DISK_INLINE | Tuple_header::MM_SHRINK | Tuple_header::MM_GROWN; copy_bits &= ~(Uint32)clear; @@ -331,7 +292,13 @@ Dbtup::commit_operation(Signal* signal, tuple_ptr->m_header_bits= copy_bits; tuple_ptr->m_operation_ptr_i= save; - if (regTabPtr->checksumIndicator) { + if (regTabPtr->m_bits & Tablerec::TR_RowGCI) + { + jam(); + * tuple_ptr->get_mm_gci(regTabPtr) = gci; + } + + if (regTabPtr->m_bits & Tablerec::TR_Checksum) { jam(); setChecksum(tuple_ptr, regTabPtr); } @@ -505,8 +472,13 @@ void Dbtup::execTUP_COMMITREQ(Signal* signal) req.m_callback.m_callbackFunction = safe_cast(&Dbtup::disk_page_commit_callback); + /* + * Consider commit to be correlated. Otherwise pk op + commit makes + * the page hot. XXX move to TUP which knows better. + */ int flags= regOperPtr.p->op_struct.op_type | - Page_cache_client::COMMIT_REQ | Page_cache_client::STRICT_ORDER; + Page_cache_client::COMMIT_REQ | Page_cache_client::STRICT_ORDER | + Page_cache_client::CORR_REQ; int res= m_pgman.get_page(signal, req, flags); switch(res){ case 0: @@ -548,9 +520,10 @@ void Dbtup::execTUP_COMMITREQ(Signal* signal) if(!tuple_ptr) { - req_struct.m_tuple_ptr= tuple_ptr = (Tuple_header*) + tuple_ptr = (Tuple_header*) get_ptr(&page, ®OperPtr.p->m_tuple_location,regTabPtr.p); } + req_struct.m_tuple_ptr = tuple_ptr; if(get_tuple_state(regOperPtr.p) == TUPLE_PREPARED) { @@ -587,7 +560,7 @@ void Dbtup::execTUP_COMMITREQ(Signal* signal) if(regOperPtr.p->op_struct.op_type != ZDELETE) { - commit_operation(signal, gci, tuple_ptr, page.p, + commit_operation(signal, gci, tuple_ptr, page, regOperPtr.p, regFragPtr.p, regTabPtr.p); removeActiveOpList(regOperPtr.p, tuple_ptr); } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp index a5e076d216f..38842e7fa03 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupDebug.cpp @@ -38,7 +38,7 @@ void Dbtup::execDEBUG_SIG(Signal* signal) PagePtr regPagePtr; ljamEntry(); regPagePtr.i = signal->theData[0]; - ptrCheckGuard(regPagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(regPagePtr); }//Dbtup::execDEBUG_SIG() #ifdef TEST_MR @@ -72,7 +72,7 @@ Dbtup::reportMemoryUsage(Signal* signal, int incDec){ signal->theData[1] = incDec; signal->theData[2] = sizeof(Page); signal->theData[3] = cnoOfAllocatedPages; - signal->theData[4] = cnoOfPage; + signal->theData[4] = c_page_pool.getSize(); signal->theData[5] = DBTUP; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 6, JBB); } @@ -168,7 +168,7 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal) // Case Uint32 c = (rand() % 3); - const Uint32 free = cnoOfPage - cnoOfAllocatedPages; + const Uint32 free = c_page_pool.getSize() - cnoOfAllocatedPages; Uint32 alloc = 0; if(free <= 1){ @@ -213,7 +213,7 @@ Dbtup::execDUMP_STATE_ORD(Signal* signal) for(Uint32 i = 0; i<chunk.pageCount; i++){ PagePtr pagePtr; pagePtr.i = chunk.pageId + i; - ptrCheckGuard(pagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(pagePtr); pagePtr.p->page_state = ~ZFREE_COMMON; } @@ -281,8 +281,7 @@ void Dbtup::printoutTuplePage(Uint32 fragid, Uint32 pageid, Uint32 printLimit) FragrecordPtr tmpFragP; TablerecPtr tmpTableP; - tmpPageP.i = pageid; - ptrCheckGuard(tmpPageP, cnoOfPage, cpage); + c_page_pool.getPtr(tmpPageP, pageid); tmpFragP.i = fragid; ptrCheckGuard(tmpFragP, cnoOfFragrec, fragrecord); @@ -334,7 +333,7 @@ operator<<(NdbOut& out, const Dbtup::Th& th) out << "[Th " << hex << &th; out << " [op " << hex << th.data[i++] << "]"; out << " [version " << hex << (Uint16)th.data[i++] << "]"; - if (tab.checksumIndicator) + if (tab.m_bits & Dbtup::Tablerec::TR_Checksum) out << " [checksum " << hex << th.data[i++] << "]"; out << " [nullbits"; for (unsigned j = 0; j < tab.m_offsets[Dbtup::MM].m_null_words; j++) @@ -381,7 +380,7 @@ NdbOut& operator<<(NdbOut& out, const Dbtup::Tablerec& tab) { out << "[ total_rec_size: " << tab.total_rec_size - << " checksum: " << tab.checksumIndicator + << " checksum: " << !!(tab.m_bits & Dbtup::Tablerec::TR_Checksum) << " attr: " << tab.m_no_of_attributes << " disk: " << tab.m_no_of_disk_attributes << " mm: " << tab.m_offsets[Dbtup::MM] diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupDiskAlloc.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupDiskAlloc.cpp index ad7acbb0829..5dfe08fac28 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupDiskAlloc.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupDiskAlloc.cpp @@ -20,7 +20,8 @@ Dbtup::Disk_alloc_info::Disk_alloc_info(const Tablerec* tabPtrP, Uint32 extent_size) { - m_curr_extent_info_ptr_i= RNIL; + m_extent_size = extent_size; + m_curr_extent_info_ptr_i = RNIL; if (tabPtrP->m_no_of_disk_attributes == 0) return; @@ -278,6 +279,7 @@ Dbtup::disk_page_prealloc(Signal* signal, int pages= err; ndbout << "allocated " << pages << " pages: " << ext.p->m_key << endl; + ext.p->m_first_page_no = ext.p->m_key.m_page_no; bzero(ext.p->m_free_page_count, sizeof(ext.p->m_free_page_count)); ext.p->m_free_space= alloc.m_page_free_bits_map[0] * pages; ext.p->m_free_page_count[0]= pages; // All pages are "free"-est @@ -528,8 +530,7 @@ Dbtup::disk_page_prealloc_initial_callback(Signal*signal, if (tabPtr.p->m_attributes[DD].m_no_of_varsize == 0) { - convertThPage(tabPtr.p->m_offsets[DD].m_fix_header_size, - (Fix_page*)gpage.p); + convertThPage((Fix_page*)gpage.p, tabPtr.p, DD); } else { @@ -1060,6 +1061,7 @@ Dbtup::disk_restart_alloc_extent(Uint32 tableId, Uint32 fragId, ext.p->m_key = *key; ndbout << "allocated " << pages << " pages: " << ext.p->m_key << endl; + ext.p->m_first_page_no = ext.p->m_key.m_page_no; bzero(ext.p->m_free_page_count, sizeof(ext.p->m_free_page_count)); ext.p->m_free_space= alloc.m_page_free_bits_map[0] * pages; ext.p->m_free_page_count[0]= pages; // All pages are "free"-est diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp index 62a8d8348f6..4ba38fd1e91 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp @@ -33,7 +33,7 @@ /* ----------------------------------------------------------------- */ /* ----------- INIT_STORED_OPERATIONREC -------------- */ /* ----------------------------------------------------------------- */ -int Dbtup::initStoredOperationrec(Operationrec* const regOperPtr, +int Dbtup::initStoredOperationrec(Operationrec* regOperPtr, KeyReqStruct* req_struct, Uint32 storedId) { @@ -54,7 +54,7 @@ int Dbtup::initStoredOperationrec(Operationrec* const regOperPtr, return terrorCode; } -void Dbtup::copyAttrinfo(Operationrec * const regOperPtr, +void Dbtup::copyAttrinfo(Operationrec * regOperPtr, Uint32* inBuffer) { AttrbufrecPtr copyAttrBufPtr; @@ -108,7 +108,7 @@ void Dbtup::copyAttrinfo(Operationrec * const regOperPtr, void Dbtup::handleATTRINFOforTUPKEYREQ(Signal* signal, const Uint32 *data, Uint32 len, - Operationrec * const regOperPtr) + Operationrec * regOperPtr) { while(len) { @@ -227,7 +227,7 @@ mem_error: void Dbtup::setChecksum(Tuple_header* tuple_ptr, - Tablerec* const regTabPtr) + Tablerec* regTabPtr) { tuple_ptr->m_checksum= 0; tuple_ptr->m_checksum= calculateChecksum(tuple_ptr, regTabPtr); @@ -235,7 +235,7 @@ Dbtup::setChecksum(Tuple_header* tuple_ptr, Uint32 Dbtup::calculateChecksum(Tuple_header* tuple_ptr, - Tablerec* const regTabPtr) + Tablerec* regTabPtr) { Uint32 checksum; Uint32 i, rec_size, *tuple_header; @@ -342,9 +342,9 @@ Dbtup::insertActiveOpList(OperationrecPtr regOperPtr, bool Dbtup::setup_read(KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, bool disk) { OperationrecPtr currOpPtr; @@ -433,167 +433,6 @@ Dbtup::setup_read(KeyReqStruct *req_struct, return false; } - bool - Dbtup::getPageThroughSavePoint(Operationrec* regOperPtr, - Operationrec* leaderOpPtr) - { - bool found= false; - OperationrecPtr loopOpPtr; - loopOpPtr.p= leaderOpPtr; - int res= 0; - while(true) { - ndbout_c("%d regOperPtr->savepointId: %d loopOpPtr.p->savepointId: %d", - res++, regOperPtr->savepointId, loopOpPtr.p->savepointId); - if (regOperPtr->savepointId > loopOpPtr.p->savepointId) { - jam(); - found= true; - break; - } - if (loopOpPtr.p->nextActiveOp == RNIL) { - break; - } - loopOpPtr.i= loopOpPtr.p->nextActiveOp; - c_operation_pool.getPtr(loopOpPtr); - jam(); - } - if (!found) { - return getPageLastCommitted(regOperPtr, loopOpPtr.p); - } else { - if (loopOpPtr.p->op_struct.op_type == ZDELETE) { - jam(); - terrorCode= ZTUPLE_DELETED_ERROR; - return false; - } - if (get_tuple_state(loopOpPtr.p) == TUPLE_ALREADY_ABORTED) { - /* - Requested tuple version has already been aborted - */ - jam(); - terrorCode= ZMUST_BE_ABORTED_ERROR; - return false; - } - bool use_copy; - if (loopOpPtr.p->prevActiveOp == RNIL) { - jam(); - /* - Use original tuple since we are reading from the last written tuple. - We are the - */ - use_copy= false; - } else { - /* - Go forward in time to find a copy of the tuple which this operation - produced - */ - loopOpPtr.i= loopOpPtr.p->prevActiveOp; - c_operation_pool.getPtr(loopOpPtr); - if (loopOpPtr.p->op_struct.op_type == ZDELETE) { - /* - This operation was a Delete and thus have no copy tuple attached to - it. We will move forward to the next that either doesn't exist in - which case we will return the original tuple of any operation and - otherwise it must be an insert which contains a copy record. - */ - if (loopOpPtr.p->prevActiveOp == RNIL) { - jam(); - use_copy= false; - } else { - jam(); - loopOpPtr.i= loopOpPtr.p->prevActiveOp; - c_operation_pool.getPtr(loopOpPtr); - ndbrequire(loopOpPtr.p->op_struct.op_type == ZINSERT); - use_copy= true; - } - } else if (loopOpPtr.p->op_struct.op_type == ZUPDATE) { - jam(); - /* - This operation which was the next in time have a copy which was the - result of the previous operation which we want to use. Thus use - the copy tuple of this operation. - */ - use_copy= true; - } else { - /* - This operation was an insert that happened after an insert or update. - This is not a possible case. - */ - ndbrequire(false); - return false; - } - } - if (use_copy) { - ndbrequire(false); - regOperPtr->m_tuple_location= loopOpPtr.p->m_copy_tuple_location; - } else { - regOperPtr->m_tuple_location= loopOpPtr.p->m_tuple_location; - } - return true; - } - } - - bool - Dbtup::getPageLastCommitted(Operationrec* const regOperPtr, - Operationrec* const leaderOpPtr) - { - //---------------------------------------------------------------------- - // Dirty reads wants to read the latest committed tuple. The latest - // tuple value could be not existing or else we have to find the copy - // tuple. Start by finding the end of the list to find the first operation - // on the record in the ongoing transaction. - //---------------------------------------------------------------------- - jam(); - OperationrecPtr loopOpPtr; - loopOpPtr.p= leaderOpPtr; - while (loopOpPtr.p->nextActiveOp != RNIL) { - jam(); - loopOpPtr.i= loopOpPtr.p->nextActiveOp; - c_operation_pool.getPtr(loopOpPtr); - } - if (loopOpPtr.p->op_struct.op_type == ZINSERT) { - jam(); - //---------------------------------------------------------------------- - // With an insert in the start of the list we know that the tuple did not - // exist before this transaction was started. We don't care if the current - // transaction is in the commit phase since the commit is not really - // completed until the operation is gone from TUP. - //---------------------------------------------------------------------- - terrorCode= ZTUPLE_DELETED_ERROR; - return false; - } else { - //---------------------------------------------------------------------- - // A successful update and delete as first in the queue means that a tuple - // exist in the committed world. We need to find it. - //---------------------------------------------------------------------- - if (loopOpPtr.p->op_struct.op_type == ZUPDATE) { - jam(); - //---------------------------------------------------------------------- - // The first operation was a delete we set our tuple reference to the - // copy tuple of this operation. - //---------------------------------------------------------------------- - ndbrequire(false); - regOperPtr->m_tuple_location= loopOpPtr.p->m_copy_tuple_location; - } else if ((loopOpPtr.p->op_struct.op_type == ZDELETE) && - (loopOpPtr.p->prevActiveOp == RNIL)) { - jam(); - //---------------------------------------------------------------------- - // There was only a delete. The original tuple still is ok. - //---------------------------------------------------------------------- - } else { - jam(); - //---------------------------------------------------------------------- - // There was another operation after the delete, this must be an insert - // and we have found our copy tuple there. - //---------------------------------------------------------------------- - loopOpPtr.i= loopOpPtr.p->prevActiveOp; - c_operation_pool.getPtr(loopOpPtr); - ndbrequire(loopOpPtr.p->op_struct.op_type == ZINSERT); - ndbrequire(false); - regOperPtr->m_tuple_location = loopOpPtr.p->m_copy_tuple_location; - } - } - return true; - } - int Dbtup::load_diskpage(Signal* signal, Uint32 opRec, Uint32 fragPtrI, @@ -749,7 +588,7 @@ Dbtup::disk_page_load_scan_callback(Signal* signal, void Dbtup::execTUPKEYREQ(Signal* signal) { - TupKeyReq * const tupKeyReq= (TupKeyReq *)signal->getDataPtr(); + TupKeyReq * tupKeyReq= (TupKeyReq *)signal->getDataPtr(); KeyReqStruct req_struct; Uint32 sig1, sig2, sig3, sig4; @@ -825,7 +664,8 @@ void Dbtup::execTUPKEYREQ(Signal* signal) req_struct.TC_index= sig2; req_struct.TC_ref= sig3; req_struct.frag_page_id= sig4; - + req_struct.m_use_rowid = (TrequestInfo >> 11) & 1; + sig1= tupKeyReq->attrBufLen; sig2= tupKeyReq->applRef; sig3= tupKeyReq->transId1; @@ -840,6 +680,12 @@ void Dbtup::execTUPKEYREQ(Signal* signal) req_struct.trans_id2= sig4; req_struct.m_disk_page_ptr.i= disk_page; + sig1 = tupKeyReq->m_row_id_page_no; + sig2 = tupKeyReq->m_row_id_page_idx; + + req_struct.m_row_id.m_page_no = sig1; + req_struct.m_row_id.m_page_idx = sig2; + Uint32 Roptype = regOperPtr->op_struct.op_type; if (Rstoredid != ZNIL) { @@ -992,16 +838,16 @@ void Dbtup::execTUPKEYREQ(Signal* signal) void Dbtup::setup_fixed_part(KeyReqStruct* req_struct, - Operationrec* const regOperPtr, - Tablerec* const regTabPtr) + Operationrec* regOperPtr, + Tablerec* regTabPtr) { PagePtr page_ptr; Uint32* ptr= get_ptr(&page_ptr, ®OperPtr->m_tuple_location, regTabPtr); - req_struct->m_page_ptr_p= page_ptr.p; - req_struct->m_tuple_ptr= (Tuple_header*)ptr; + req_struct->m_page_ptr = page_ptr; + req_struct->m_tuple_ptr = (Tuple_header*)ptr; + + ndbassert(regOperPtr->op_struct.op_type == ZINSERT || (! (req_struct->m_tuple_ptr->m_header_bits & Tuple_header::FREE))); - ndbassert(! (req_struct->m_tuple_ptr->m_header_bits & Tuple_header::FREE)); - req_struct->check_offset[MM]= regTabPtr->get_check_offset(MM); req_struct->check_offset[DD]= regTabPtr->get_check_offset(DD); @@ -1017,10 +863,11 @@ Dbtup::setup_fixed_part(KeyReqStruct* req_struct, /* ---------------------------------------------------------------- */ void Dbtup::sendTUPKEYCONF(Signal* signal, KeyReqStruct *req_struct, - Operationrec * const regOperPtr) + Operationrec * regOperPtr) { - TupKeyConf * const tupKeyConf= (TupKeyConf *)signal->getDataPtrSend(); + TupKeyConf * tupKeyConf= (TupKeyConf *)signal->getDataPtrSend(); + Uint32 Rcreate_rowid = req_struct->m_use_rowid; Uint32 RuserPointer= regOperPtr->userpointer; Uint32 RnoFiredTriggers= req_struct->no_fired_triggers; Uint32 log_size= req_struct->log_size; @@ -1034,6 +881,7 @@ Dbtup::setup_fixed_part(KeyReqStruct* req_struct, tupKeyConf->writeLength= log_size; tupKeyConf->noFiredTriggers= RnoFiredTriggers; tupKeyConf->lastRow= last_row; + tupKeyConf->rowid = Rcreate_rowid; EXECUTE_DIRECT(DBLQH, GSN_TUPKEYCONF, signal, TupKeyConf::SignalLength); @@ -1047,14 +895,14 @@ Dbtup::setup_fixed_part(KeyReqStruct* req_struct, /* ----------------------------- READ ---------------------------- */ /* ---------------------------------------------------------------- */ int Dbtup::handleReadReq(Signal* signal, - Operationrec* const regOperPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Tablerec* regTabPtr, KeyReqStruct* req_struct) { Uint32 *dst; Uint32 dstLen, start_index; const BlockReference sendBref= req_struct->rec_blockref; - if (regTabPtr->checksumIndicator && + if ((regTabPtr->m_bits & Tablerec::TR_Checksum) && (calculateChecksum(req_struct->m_tuple_ptr, regTabPtr) != 0)) { jam(); ndbout_c("here2"); @@ -1109,9 +957,9 @@ int Dbtup::handleReadReq(Signal* signal, /* ---------------------------- UPDATE ---------------------------- */ /* ---------------------------------------------------------------- */ int Dbtup::handleUpdateReq(Signal* signal, - Operationrec* const operPtrP, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Operationrec* operPtrP, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, KeyReqStruct* req_struct, bool disk) { @@ -1141,7 +989,7 @@ int Dbtup::handleUpdateReq(Signal* signal, * Check consistency before update/delete */ req_struct->m_tuple_ptr= org; - if (regTabPtr->checksumIndicator && + if ((regTabPtr->m_bits & Tablerec::TR_Checksum) && (calculateChecksum(req_struct->m_tuple_ptr, regTabPtr) != 0)) { terrorCode= ZTUPLE_CORRUPTED_ERROR; @@ -1168,7 +1016,7 @@ int Dbtup::handleUpdateReq(Signal* signal, terrorCode= c_lgman->alloc_log_space(regFragPtr->m_logfile_group_id, sz); - if(terrorCode) + if(unlikely(terrorCode)) { operPtrP->m_undo_buffer_space= 0; goto error; @@ -1212,7 +1060,7 @@ int Dbtup::handleUpdateReq(Signal* signal, } req_struct->m_tuple_ptr->set_tuple_version(tup_version); - if (regTabPtr->checksumIndicator) { + if (regTabPtr->m_bits & Tablerec::TR_Checksum) { jam(); setChecksum(req_struct->m_tuple_ptr, regTabPtr); } @@ -1344,7 +1192,7 @@ Dbtup::fix_disk_insert_no_mem_insert(KeyReqStruct *req_struct, int Dbtup::handleInsertReq(Signal* signal, Ptr<Operationrec> regOperPtr, Ptr<Fragrecord> fragPtr, - Tablerec* const regTabPtr, + Tablerec* regTabPtr, KeyReqStruct *req_struct) { Uint32 tup_version = 1; @@ -1352,30 +1200,34 @@ int Dbtup::handleInsertReq(Signal* signal, Uint32 *dst, *ptr= 0; Tuple_header *base= req_struct->m_tuple_ptr, *org= base; Tuple_header *tuple_ptr; - bool disk, mem_insert, disk_insert; - Uint32 frag_page_id, real_page_id; - - if ((dst= - c_undo_buffer.alloc_copy_tuple(®OperPtr.p->m_copy_tuple_location, - regTabPtr->total_rec_size)) == 0) - { - goto mem_error; - } - tuple_ptr= req_struct->m_tuple_ptr= (Tuple_header*)dst; - - if(0) - ndbout << "dst: " << hex << UintPtr(dst) << " - " - << regOperPtr.p->m_copy_tuple_location << endl; - - disk = regTabPtr->m_no_of_disk_attributes > 0; - mem_insert = get_tuple_state(regOperPtr.p) == TUPLE_INITIAL_INSERT; - disk_insert = regOperPtr.p->is_first_operation() && disk; + + bool disk = regTabPtr->m_no_of_disk_attributes > 0; + bool mem_insert = get_tuple_state(regOperPtr.p) == TUPLE_INITIAL_INSERT; + bool disk_insert = regOperPtr.p->is_first_operation() && disk; + bool varsize = regTabPtr->m_attributes[MM].m_no_of_varsize; + bool rowid = req_struct->m_use_rowid; + Uint32 real_page_id = regOperPtr.p->m_tuple_location.m_page_no; + Uint32 frag_page_id = req_struct->frag_page_id; union { Uint32 sizes[4]; Uint64 cmp[2]; }; + if (ERROR_INSERTED(4014)) + { + dst = 0; + goto undo_buffer_error; + } + + dst= c_undo_buffer.alloc_copy_tuple(®OperPtr.p->m_copy_tuple_location, + regTabPtr->total_rec_size); + if (unlikely(dst == 0)) + { + goto undo_buffer_error; + } + tuple_ptr= req_struct->m_tuple_ptr= (Tuple_header*)dst; + if(mem_insert) { jam(); @@ -1408,22 +1260,42 @@ int Dbtup::handleInsertReq(Signal* signal, sizes[DD] = sizes[DD+2] = regTabPtr->m_offsets[DD].m_fix_header_size; fix_disk_insert_no_mem_insert(req_struct, regOperPtr.p, regTabPtr); } - if((res= c_lgman->alloc_log_space(regFragPtr->m_logfile_group_id, - regOperPtr.p->m_undo_buffer_space))) + + if (ERROR_INSERTED(4015)) + { + terrorCode = 1501; + goto log_space_error; + } + + res= c_lgman->alloc_log_space(regFragPtr->m_logfile_group_id, + regOperPtr.p->m_undo_buffer_space); + if(unlikely(res)) { terrorCode= res; - regOperPtr.p->m_undo_buffer_space= 0; goto log_space_error; } } regOperPtr.p->tupVersion= tup_version & ZTUP_VERSION_MASK; tuple_ptr->set_tuple_version(tup_version); - if(updateAttributes(req_struct, &cinBuffer[0], - req_struct->attrinfo_len) == -1) - return -1; - if (checkNullAttributes(req_struct, regTabPtr) == false) + if (ERROR_INSERTED(4016)) + { + terrorCode = ZAI_INCONSISTENCY_ERROR; + goto update_error; + } + + if(unlikely(updateAttributes(req_struct, &cinBuffer[0], + req_struct->attrinfo_len) == -1)) + { + goto update_error; + } + + if (ERROR_INSERTED(4017)) + { + goto null_check_error; + } + if (unlikely(checkNullAttributes(req_struct, regTabPtr) == false)) { goto null_check_error; } @@ -1436,32 +1308,69 @@ int Dbtup::handleInsertReq(Signal* signal, /** * Alloc memory */ - frag_page_id = req_struct->frag_page_id; - real_page_id = regOperPtr.p->m_tuple_location.m_page_no; if(mem_insert) { - if (!regTabPtr->m_attributes[MM].m_no_of_varsize) + if (!rowid) { - jam(); - if ((ptr= alloc_fix_rec(regFragPtr, - regTabPtr, - ®OperPtr.p->m_tuple_location, - &frag_page_id)) == 0) + if (ERROR_INSERTED(4018)) { goto mem_error; } - } - else - { - jam(); - regOperPtr.p->m_tuple_location.m_file_no= sizes[2+MM]; - if ((ptr= alloc_var_rec(regFragPtr, regTabPtr, - sizes[2+MM], - ®OperPtr.p->m_tuple_location, - &frag_page_id, 0)) == 0) + + if (!varsize) + { + jam(); + ptr= alloc_fix_rec(regFragPtr, + regTabPtr, + ®OperPtr.p->m_tuple_location, + &frag_page_id); + } + else + { + jam(); + regOperPtr.p->m_tuple_location.m_file_no= sizes[2+MM]; + ptr= alloc_var_rec(regFragPtr, regTabPtr, + sizes[2+MM], + ®OperPtr.p->m_tuple_location, + &frag_page_id); + } + if (unlikely(ptr == 0)) + { goto mem_error; + } + req_struct->m_use_rowid = true; + } + else + { + regOperPtr.p->m_tuple_location = req_struct->m_row_id; + if (ERROR_INSERTED(4019)) + { + terrorCode = ZROWID_ALLOCATED; + goto alloc_rowid_error; + } + + if (!varsize) + { + jam(); + ptr= alloc_fix_rowid(regFragPtr, + regTabPtr, + ®OperPtr.p->m_tuple_location, + &frag_page_id); + } + else + { + jam(); + regOperPtr.p->m_tuple_location.m_file_no= sizes[2+MM]; + ptr= alloc_var_rowid(regFragPtr, regTabPtr, + sizes[2+MM], + ®OperPtr.p->m_tuple_location, + &frag_page_id); + } + if (unlikely(ptr == 0)) + { + goto alloc_rowid_error; + } } - real_page_id = regOperPtr.p->m_tuple_location.m_page_no; regOperPtr.p->m_tuple_location.m_page_no= frag_page_id; c_lqh->accminupdate(signal, @@ -1469,21 +1378,43 @@ int Dbtup::handleInsertReq(Signal* signal, ®OperPtr.p->m_tuple_location); ((Tuple_header*)ptr)->m_operation_ptr_i= regOperPtr.i; - ((Tuple_header*)ptr)->m_header_bits= Tuple_header::ALLOC; + ((Tuple_header*)ptr)->m_header_bits= Tuple_header::ALLOC | + (varsize ? Tuple_header::CHAINED_ROW : 0); regOperPtr.p->m_tuple_location.m_page_no = real_page_id; } - else + else if(!rowid || !regOperPtr.p->is_first_operation()) { int ret; + if (ERROR_INSERTED(4020)) + { + goto size_change_error; + } + if (regTabPtr->need_shrink() && cmp[0] != cmp[1] && - (ret = handle_size_change_after_update(req_struct, - base, - regOperPtr.p, - regFragPtr, - regTabPtr, - sizes))) + unlikely(ret = handle_size_change_after_update(req_struct, + base, + regOperPtr.p, + regFragPtr, + regTabPtr, + sizes))) { - return ret; + goto size_change_error; + } + req_struct->m_use_rowid = false; + base->m_header_bits &= ~(Uint32)Tuple_header::FREE; + } + else + { + if ((req_struct->m_row_id.m_page_no == frag_page_id && + req_struct->m_row_id.m_page_idx == regOperPtr.p->m_tuple_location.m_page_idx)) + { + ndbout_c("no mem insert but rowid (same)"); + base->m_header_bits &= ~(Uint32)Tuple_header::FREE; + } + else + { + // no mem insert, but rowid + ndbrequire(false); } } @@ -1493,8 +1424,17 @@ int Dbtup::handleInsertReq(Signal* signal, Uint32 size= regTabPtr->m_attributes[DD].m_no_of_varsize == 0 ? 1 : sizes[2+DD]; + if (ERROR_INSERTED(4021)) + { + terrorCode = 1601; + goto disk_prealloc_error; + } + int ret= disk_page_prealloc(signal, fragPtr, &tmp, size); - ndbassert(ret >= 0); + if (unlikely(ret < 0)) + { + goto disk_prealloc_error; + } regOperPtr.p->op_struct.m_disk_preallocated= 1; tmp.m_page_idx= size; @@ -1511,28 +1451,52 @@ int Dbtup::handleInsertReq(Signal* signal, disk_ptr->m_base_record_ref= ref.ref(); } - if (regTabPtr->checksumIndicator) + if (regTabPtr->m_bits & Tablerec::TR_Checksum) { jam(); setChecksum(req_struct->m_tuple_ptr, regTabPtr); } return 0; -mem_error: +size_change_error: + jam(); + terrorCode = ZMEM_NOMEM_ERROR; + goto disk_prealloc_error; + +undo_buffer_error: + jam(); terrorCode= ZMEM_NOMEM_ERROR; - goto error; + regOperPtr.p->m_undo_buffer_space = 0; + if (mem_insert) + regOperPtr.p->m_tuple_location.setNull(); + regOperPtr.p->m_copy_tuple_location.setNull(); + tupkeyErrorLab(signal); + return -1; null_check_error: + jam(); terrorCode= ZNO_ILLEGAL_NULL_ATTR; - goto error; + goto update_error; -error: - tupkeyErrorLab(signal); - return -1; +mem_error: + jam(); + terrorCode= ZMEM_NOMEM_ERROR; + goto update_error; log_space_error: - regOperPtr.p->op_struct.in_active_list = false; - early_tupkey_error(signal); + jam(); + regOperPtr.p->m_undo_buffer_space = 0; +alloc_rowid_error: + jam(); +update_error: + jam(); + if (mem_insert) + { + regOperPtr.p->op_struct.in_active_list = false; + regOperPtr.p->m_tuple_location.setNull(); + } +disk_prealloc_error: + tupkeyErrorLab(signal); return -1; } @@ -1540,9 +1504,9 @@ log_space_error: /* ---------------------------- DELETE ---------------------------- */ /* ---------------------------------------------------------------- */ int Dbtup::handleDeleteReq(Signal* signal, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, + Operationrec* regOperPtr, + Fragrecord* regFragPtr, + Tablerec* regTabPtr, KeyReqStruct *req_struct) { // delete must set but not increment tupVersion @@ -1596,8 +1560,8 @@ error: } bool -Dbtup::checkNullAttributes(KeyReqStruct * const req_struct, - Tablerec* const regTabPtr) +Dbtup::checkNullAttributes(KeyReqStruct * req_struct, + Tablerec* regTabPtr) { // Implement checking of updating all not null attributes in an insert here. Bitmask<MAXNROFATTRIBUTESINWORDS> attributeMask; @@ -2504,7 +2468,7 @@ int Dbtup::interpreterNextLab(Signal* signal, Uint32* expand_var_part(Dbtup::KeyReqStruct::Var_data *dst, const Uint32* src, - const Uint32 * const tabDesc, + const Uint32 * tabDesc, const Uint16* order) { char* dst_ptr= dst->m_data_ptr; @@ -2553,7 +2517,7 @@ Dbtup::expand_tuple(KeyReqStruct* req_struct, Uint32 *dst_ptr= ptr->get_var_part_ptr(tabPtrP); const Uint32 *disk_ref= src->get_disk_ref_ptr(tabPtrP); const Uint32 *src_ptr= src->get_var_part_ptr(tabPtrP); - const Uint32 * const desc= (Uint32*)req_struct->attr_descr; + const Uint32 * desc= (Uint32*)req_struct->attr_descr; const Uint16 *order = (Uint16*)(&tableDescriptor[order_desc]); order += tabPtrP->m_attributes[MM].m_no_of_fixsize; @@ -2565,17 +2529,17 @@ Dbtup::expand_tuple(KeyReqStruct* req_struct, KeyReqStruct::Var_data* dst= &req_struct->m_var_data[MM]; if(bits & Tuple_header::CHAINED_ROW) { - Ptr<Var_page> var_page; + Ptr<Page> var_page; src_data= get_ptr(&var_page, * (Var_part_ref*)src_ptr); step= 4; sizes[MM]= (2 + (mm_vars << 1) + ((Uint16*)src_data)[mm_vars] + 3) >> 2; - req_struct->m_varpart_page_ptr_p= var_page.p; + req_struct->m_varpart_page_ptr = var_page; } else { step= (2 + (mm_vars << 1) + ((Uint16*)src_ptr)[mm_vars]); sizes[MM]= (step + 3) >> 2; - req_struct->m_varpart_page_ptr_p= (Var_page*)req_struct->m_page_ptr_p; + req_struct->m_varpart_page_ptr = req_struct->m_page_ptr; } dst->m_data_ptr= (char*)(((Uint16*)dst_ptr)+mm_vars+1); dst->m_offset_array_ptr= req_struct->var_pos_array; @@ -2617,7 +2581,7 @@ Dbtup::expand_tuple(KeyReqStruct* req_struct, Local_key key; memcpy(&key, disk_ref, sizeof(key)); key.m_page_no= req_struct->m_disk_page_ptr.i; - src_ptr= get_ptr(&req_struct->m_disk_page_ptr, &key, tabPtrP, DD); + src_ptr= get_dd_ptr(&req_struct->m_disk_page_ptr, &key, tabPtrP); } bits |= Tuple_header::DISK_INLINE; @@ -2649,7 +2613,7 @@ Dbtup::expand_tuple(KeyReqStruct* req_struct, void Dbtup::prepare_read(KeyReqStruct* req_struct, - Tablerec* const tabPtrP, bool disk) + Tablerec* tabPtrP, bool disk) { Tuple_header* ptr= req_struct->m_tuple_ptr; @@ -2700,7 +2664,7 @@ Dbtup::prepare_read(KeyReqStruct* req_struct, Local_key key; memcpy(&key, disk_ref, sizeof(key)); key.m_page_no= req_struct->m_disk_page_ptr.i; - src_ptr= get_ptr(&req_struct->m_disk_page_ptr, &key, tabPtrP, DD); + src_ptr= get_dd_ptr(&req_struct->m_disk_page_ptr, &key, tabPtrP); } // Fix diskpart req_struct->m_disk_ptr= (Tuple_header*)src_ptr; @@ -2799,22 +2763,23 @@ Dbtup::validate_page(Tablerec* regTabPtr, Var_page* p) for(Uint32 P= 0; P<fragPtr.p->noOfPages; P++) { Uint32 real= getRealpid(fragPtr.p, P); - Var_page* page= (Var_page*)(cpage + real); + Var_page* page= (Var_page*)c_page_pool.getPtr(real); for(Uint32 i=1; i<page->high_index; i++) { - Uint32 len= page->get_entry_len(i); - if(len && !(len & Var_page::CHAIN)) + Uint32 idx= page->get_index_word(i); + Uint32 len = (idx & Var_page::LEN_MASK) >> Var_page::LEN_SHIFT; + if(!(idx & Var_page::FREE) && !(idx & Var_page::CHAIN)) { Tuple_header *ptr= (Tuple_header*)page->get_ptr(i); Uint32 *part= ptr->get_var_part_ptr(regTabPtr); if(ptr->m_header_bits & Tuple_header::CHAINED_ROW) { - assert(len == fix_sz + 1); + ndbassert(len == fix_sz + 1); Local_key tmp; tmp.assref(*part); - Ptr<Var_page> tmpPage; + Ptr<Page> tmpPage; part= get_ptr(&tmpPage, *(Var_part_ref*)part); - len= tmpPage.p->get_entry_len(tmp.m_page_idx); + len= ((Var_page*)tmpPage.p)->get_entry_len(tmp.m_page_idx); Uint32 sz= ((mm_vars + 1) << 1) + (((Uint16*)part)[mm_vars]); ndbassert(len >= ((sz + 3) >> 2)); } @@ -2828,14 +2793,14 @@ Dbtup::validate_page(Tablerec* regTabPtr, Var_page* p) c_operation_pool.getPtr(ptr->m_operation_ptr_i); } } - else if(len) + else if(!(idx & Var_page::FREE)) { /** * Chain */ Uint32 *part= page->get_ptr(i); Uint32 sz= ((mm_vars + 1) << 1) + (((Uint16*)part)[mm_vars]); - ndbassert((len & ~Var_page::CHAIN) >= ((sz + 3) >> 2)); + ndbassert(len >= ((sz + 3) >> 2)); } else { @@ -2884,26 +2849,27 @@ Dbtup::handle_size_change_after_update(KeyReqStruct* req_struct, else { if(0) printf("grow - "); - Var_page* pageP= req_struct->m_varpart_page_ptr_p; + Ptr<Page> pagePtr = req_struct->m_varpart_page_ptr; + Var_page* pageP= (Var_page*)pagePtr.p; Uint32 idx, alloc, needed; - if(! (bits & Tuple_header::CHAINED_ROW)) - { - idx= regOperPtr->m_tuple_location.m_page_idx; - alloc= pageP->get_entry_len(idx) & ~Var_page::CHAIN; - ndbassert(!(pageP->get_entry_len(idx) & Var_page::CHAIN)); - needed= sizes[2+MM]; - } - else + Uint32 *refptr = org->get_var_part_ptr(regTabPtr); + ndbassert(bits & Tuple_header::CHAINED_ROW); + + Local_key ref; + ref.assref(*refptr); + idx= ref.m_page_idx; + if (! (copy_bits & Tuple_header::CHAINED_ROW)) { - Local_key tmp; - tmp.assref(*org->get_var_part_ptr(regTabPtr)); - idx= tmp.m_page_idx; - alloc= pageP->get_entry_len(idx) & ~Var_page::CHAIN; - if(!(pageP->get_entry_len(idx) & Var_page::CHAIN)) - ndbout << *pageP << endl; - ndbassert(pageP->get_entry_len(idx) & Var_page::CHAIN); - needed= sizes[2+MM] - fix_sz; + c_page_pool.getPtr(pagePtr, ref.m_page_no); + pageP = (Var_page*)pagePtr.p; } + alloc= pageP->get_entry_len(idx); +#ifdef VM_TRACE + if(!pageP->get_entry_chain(idx)) + ndbout << *pageP << endl; +#endif + ndbassert(pageP->get_entry_chain(idx)); + needed= sizes[2+MM] - fix_sz; if(needed <= alloc) { @@ -2911,88 +2877,348 @@ Dbtup::handle_size_change_after_update(KeyReqStruct* req_struct, ndbout_c(" no grow"); return 0; } - Uint32 add= needed - alloc; copy_bits |= Tuple_header::MM_GROWN; + if (unlikely(realloc_var_part(regFragPtr, regTabPtr, pagePtr, + (Var_part_ref*)refptr, alloc, needed))) + return -1; + } + req_struct->m_tuple_ptr->m_header_bits = copy_bits; + return 0; +} + +int +Dbtup::nr_update_gci(Uint32 fragPtrI, const Local_key* key, Uint32 gci) +{ + FragrecordPtr fragPtr; + fragPtr.i= fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + TablerecPtr tablePtr; + tablePtr.i= fragPtr.p->fragTableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + + if (tablePtr.p->m_bits & Tablerec::TR_RowGCI) + { + Local_key tmp = *key; + PagePtr page_ptr; + + int ret; + if (tablePtr.p->m_attributes[MM].m_no_of_varsize) + { + tablePtr.p->m_offsets[MM].m_fix_header_size += + Tuple_header::HeaderSize+1; + ret = alloc_page(tablePtr.p, fragPtr.p, &page_ptr, tmp.m_page_no); + tablePtr.p->m_offsets[MM].m_fix_header_size -= + Tuple_header::HeaderSize+1; + } + else + { + ret = alloc_page(tablePtr.p, fragPtr.p, &page_ptr, tmp.m_page_no); + } + + if (ret) + return -1; + + Tuple_header* ptr = (Tuple_header*) + ((Fix_page*)page_ptr.p)->get_ptr(tmp.m_page_idx, 0); - if(pageP->free_space >= add) + ndbrequire(ptr->m_header_bits & Tuple_header::FREE); + *ptr->get_mm_gci(tablePtr.p) = gci; + } + return 0; +} + +int +Dbtup::nr_read_pk(Uint32 fragPtrI, + const Local_key* key, Uint32* dst, bool& copy) +{ + + FragrecordPtr fragPtr; + fragPtr.i= fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + TablerecPtr tablePtr; + tablePtr.i= fragPtr.p->fragTableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + + Local_key tmp = *key; + Uint32 pages = fragPtr.p->noOfPages; + + int ret; + PagePtr page_ptr; + if (tablePtr.p->m_attributes[MM].m_no_of_varsize) + { + tablePtr.p->m_offsets[MM].m_fix_header_size += Tuple_header::HeaderSize+1; + ret = alloc_page(tablePtr.p, fragPtr.p, &page_ptr, tmp.m_page_no); + tablePtr.p->m_offsets[MM].m_fix_header_size -= Tuple_header::HeaderSize+1; + } + else + { + ret = alloc_page(tablePtr.p, fragPtr.p, &page_ptr, tmp.m_page_no); + } + if (ret) + return -1; + + KeyReqStruct req_struct; + Uint32* ptr= ((Fix_page*)page_ptr.p)->get_ptr(key->m_page_idx, 0); + + req_struct.m_page_ptr = page_ptr; + req_struct.m_tuple_ptr = (Tuple_header*)ptr; + Uint32 bits = req_struct.m_tuple_ptr->m_header_bits; + + ret = 0; + copy = false; + if (! (bits & Tuple_header::FREE)) + { + if (bits & Tuple_header::ALLOC) { - jam(); - if(!pageP->is_space_behind_entry(idx, add)) - { - if(0) printf("extra reorg"); - jam(); - /** - * In this case we need to reorganise the page to fit. To ensure we - * don't complicate matters we make a little trick here where we - * fool the reorg_page to avoid copying the entry at hand and copy - * that separately at the end. This means we need to copy it out of - * the page before reorg_page to save the entry contents. - */ - Uint32* copyBuffer= cinBuffer; - memcpy(copyBuffer, pageP->get_ptr(idx), 4*alloc); - pageP->set_entry_len(idx, 0); - pageP->free_space += alloc; - pageP->reorg((Var_page*)ctemp_page); - memcpy(pageP->get_free_space_ptr(), copyBuffer, 4*alloc); - pageP->set_entry_offset(idx, pageP->insert_pos); - add += alloc; + Uint32 opPtrI= req_struct.m_tuple_ptr->m_operation_ptr_i; + Operationrec* opPtrP= c_operation_pool.getPtr(opPtrI); + ndbassert(!opPtrP->m_copy_tuple_location.isNull()); + req_struct.m_tuple_ptr= (Tuple_header*) + c_undo_buffer.get_ptr(&opPtrP->m_copy_tuple_location); + copy = true; + } + req_struct.check_offset[MM]= tablePtr.p->get_check_offset(MM); + req_struct.check_offset[DD]= tablePtr.p->get_check_offset(DD); + + Uint32 num_attr= tablePtr.p->m_no_of_attributes; + Uint32 descr_start= tablePtr.p->tabDescriptor; + TableDescriptor *tab_descr= &tableDescriptor[descr_start]; + ndbrequire(descr_start + (num_attr << ZAD_LOG_SIZE) <= cnoOfTabDescrRec); + req_struct.attr_descr= tab_descr; + + if (tablePtr.p->need_expand()) + prepare_read(&req_struct, tablePtr.p, false); + + const Uint32* attrIds= &tableDescriptor[tablePtr.p->readKeyArray].tabDescr; + const Uint32 numAttrs= tablePtr.p->noOfKeyAttr; + // read pk attributes from original tuple + + // new globals + tabptr= tablePtr; + fragptr= fragPtr; + operPtr.i= RNIL; + operPtr.p= NULL; + + // do it + ret = readAttributes(&req_struct, + attrIds, + numAttrs, + dst, + ZNIL, false); + + // done + if (likely(ret != -1)) { + // remove headers + Uint32 n= 0; + Uint32 i= 0; + while (n < numAttrs) { + const AttributeHeader ah(dst[i]); + Uint32 size= ah.getDataSize(); + ndbrequire(size != 0); + for (Uint32 j= 0; j < size; j++) { + dst[i + j - n]= dst[i + j + 1]; + } + n+= 1; + i+= 1 + size; } - pageP->grow_entry(idx, add); - ndbassert((pageP->get_entry_len(idx) & Var_page::CHAIN) == - (bits & Tuple_header::CHAINED_ROW ? Var_page::CHAIN : 0)); - update_free_page_list(regFragPtr, pageP); + ndbrequire((int)i == ret); + ret -= numAttrs; + } else { + return terrorCode ? (-(int)terrorCode) : -1; } - else + } + + if (tablePtr.p->m_bits & Tablerec::TR_RowGCI) + { + dst[ret] = *req_struct.m_tuple_ptr->get_mm_gci(tablePtr.p); + } + else + { + dst[ret] = 0; + } + return ret; +} + +#include <signaldata/TuxMaint.hpp> + +int +Dbtup::nr_delete(Signal* signal, Uint32 senderData, + Uint32 fragPtrI, const Local_key* key, Uint32 gci) +{ + FragrecordPtr fragPtr; + fragPtr.i= fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + TablerecPtr tablePtr; + tablePtr.i= fragPtr.p->fragTableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + + Local_key tmp = * key; + tmp.m_page_no= getRealpid(fragPtr.p, tmp.m_page_no); + + PagePtr pagePtr; + Tuple_header* ptr= (Tuple_header*)get_ptr(&pagePtr, &tmp, tablePtr.p); + + if (!tablePtr.p->tuxCustomTriggers.isEmpty()) + { + jam(); + TuxMaintReq* req = (TuxMaintReq*)signal->getDataPtrSend(); + req->tableId = fragPtr.p->fragTableId; + req->fragId = fragPtr.p->fragmentId; + req->pageId = tmp.m_page_no; + req->pageIndex = tmp.m_page_idx; + req->tupVersion = ptr->get_tuple_version(); + req->opInfo = TuxMaintReq::OpRemove; + removeTuxEntries(signal, tablePtr.p); + } + + Local_key disk; + memcpy(&disk, ptr->get_disk_ref_ptr(tablePtr.p), sizeof(disk)); + + if (tablePtr.p->m_attributes[MM].m_no_of_varsize) + { + jam(); + free_var_rec(fragPtr.p, tablePtr.p, &tmp, pagePtr); + } else { + jam(); + free_fix_rec(fragPtr.p, tablePtr.p, &tmp, (Fix_page*)pagePtr.p); + } + + if (tablePtr.p->m_no_of_disk_attributes) + { + jam(); + + Uint32 sz = (sizeof(Dbtup::Disk_undo::Free) >> 2) + + tablePtr.p->m_offsets[DD].m_fix_header_size - 1; + + int res = c_lgman->alloc_log_space(fragPtr.p->m_logfile_group_id, sz); + ndbrequire(res == 0); + + /** + * 1) alloc log buffer + * 2) get page + * 3) get log buffer + * 4) delete tuple + */ + Page_cache_client::Request preq; + preq.m_page = disk; + preq.m_callback.m_callbackData = senderData; + preq.m_callback.m_callbackFunction = + safe_cast(&Dbtup::nr_delete_page_callback); + int flags = Page_cache_client::COMMIT_REQ | + Page_cache_client::STRICT_ORDER; + res = m_pgman.get_page(signal, preq, flags); + if (res == 0) { - Local_key key; + goto timeslice; + } + else if (unlikely(res == -1)) + { + return -1; + } - if(! (bits & Tuple_header::CHAINED_ROW)) - { - assert(fix_sz < alloc); - org->m_header_bits |= Tuple_header::CHAINED_ROW; - Uint32 id, *dst= alloc_var_rec(regFragPtr, regTabPtr, - needed - fix_sz, &key, &id, - Var_page::CHAIN); - assert(dst); - ndbassert(key.m_page_no != pageP->physical_page_id); - ndbassert(pageP->get_ptr(idx) == (Uint32*)org); - Uint32 *ptr= org->get_var_part_ptr(regTabPtr); - - Uint32 old= pageP->get_entry_len(idx); - memcpy(dst, ptr, 4*(old - fix_sz)); - * ptr = key.ref(); // store ref - - ndbassert((ptr - (Uint32*)org) + 1 == fix_sz + 1); - pageP->shrink_entry(idx, fix_sz + 1); // var part ref - //ndbout_c("%p->shrink_entry(%d, %d)", pageP, idx, fix_sz + 1); - update_free_page_list(regFragPtr, pageP); - } - else - { - assert(sizes[2+MM] >= alloc); - Uint32 id, *dst= alloc_var_rec(regFragPtr, regTabPtr, - needed, &key, &id, - Var_page::CHAIN); - assert(dst); - ndbassert(key.m_page_no != pageP->physical_page_id); - - // Alloc var_rec can reorg base page, so we need to refetch ptr - Uint32 base_idx= regOperPtr->m_tuple_location.m_page_idx; - org= (Tuple_header*) - ((Var_page*)req_struct->m_page_ptr_p)->get_ptr(base_idx); - Uint32 *ref= org->get_var_part_ptr(regTabPtr); - Uint32 old_ref= *ref; - Uint32 *src= pageP->get_ptr(idx); - - assert(alloc < needed); - memcpy(dst, src, 4*alloc); - *ref = key.ref(); - - free_var_part(regFragPtr, regTabPtr, - *(Var_part_ref*)&old_ref, Var_page::CHAIN); - } + PagePtr disk_page = *(PagePtr*)&m_pgman.m_ptr; + + preq.m_callback.m_callbackFunction = + safe_cast(&Dbtup::nr_delete_logbuffer_callback); + Logfile_client lgman(this, c_lgman, fragPtr.p->m_logfile_group_id); + res= lgman.get_log_buffer(signal, sz, &preq.m_callback); + switch(res){ + case 0: + signal->theData[2] = disk_page.i; + goto timeslice; + case -1: + ndbrequire("NOT YET IMPLEMENTED" == 0); + break; } + + ndbout << "DIRECT DISK DELETE: " << disk << endl; + disk_page_free(signal, tablePtr.p, fragPtr.p, + &disk, *(PagePtr*)&disk_page, gci); + return 0; } - req_struct->m_tuple_ptr->m_header_bits = copy_bits; + return 0; + +timeslice: + memcpy(signal->theData, &disk, sizeof(disk)); + return 1; +} + +void +Dbtup::nr_delete_page_callback(Signal* signal, + Uint32 userpointer, Uint32 page_id) +{ + Ptr<GlobalPage> gpage; + m_global_page_pool.getPtr(gpage, page_id); + PagePtr pagePtr= *(PagePtr*)&gpage; + + Dblqh::Nr_op_info op; + op.m_ptr_i = userpointer; + op.m_disk_ref.m_page_no = pagePtr.p->m_page_no; + op.m_disk_ref.m_file_no = pagePtr.p->m_file_no; + c_lqh->get_nr_op_info(&op, page_id); + + Ptr<Fragrecord> fragPtr; + fragPtr.i= op.m_tup_frag_ptr_i; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + + Ptr<Tablerec> tablePtr; + tablePtr.i = fragPtr.p->fragTableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + + Uint32 sz = (sizeof(Dbtup::Disk_undo::Free) >> 2) + + tablePtr.p->m_offsets[DD].m_fix_header_size - 1; + + Callback cb; + cb.m_callbackData = userpointer; + cb.m_callbackFunction = + safe_cast(&Dbtup::nr_delete_logbuffer_callback); + Logfile_client lgman(this, c_lgman, fragPtr.p->m_logfile_group_id); + int res= lgman.get_log_buffer(signal, sz, &cb); + switch(res){ + case 0: + return; + case -1: + ndbrequire("NOT YET IMPLEMENTED" == 0); + break; + } + + ndbout << "PAGE CALLBACK DISK DELETE: " << op.m_disk_ref << endl; + disk_page_free(signal, tablePtr.p, fragPtr.p, + &op.m_disk_ref, pagePtr, op.m_gci); + + c_lqh->nr_delete_complete(signal, &op); + return; +} + +void +Dbtup::nr_delete_logbuffer_callback(Signal* signal, + Uint32 userpointer, + Uint32 unused) +{ + Dblqh::Nr_op_info op; + op.m_ptr_i = userpointer; + c_lqh->get_nr_op_info(&op, RNIL); + + Ptr<Fragrecord> fragPtr; + fragPtr.i= op.m_tup_frag_ptr_i; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + + Ptr<Tablerec> tablePtr; + tablePtr.i = fragPtr.p->fragTableId; + ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); + + Ptr<GlobalPage> gpage; + m_global_page_pool.getPtr(gpage, op.m_page_id); + PagePtr pagePtr= *(PagePtr*)&gpage; + + /** + * reset page no + */ + ndbout << "LOGBUFFER CALLBACK DISK DELETE: " << op.m_disk_ref << endl; + + disk_page_free(signal, tablePtr.p, fragPtr.p, + &op.m_disk_ref, pagePtr, op.m_gci); + + c_lqh->nr_delete_complete(signal, &op); } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupFixAlloc.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupFixAlloc.cpp index e37b493f1e1..f90ef9d3a99 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupFixAlloc.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupFixAlloc.cpp @@ -73,7 +73,7 @@ Dbtup::alloc_fix_rec(Fragrecord* const regFragPtr, /* FAILED. TRY ALLOCATING FROM NORMAL PAGE. */ /* ---------------------------------------------------------------- */ PagePtr pagePtr; - pagePtr.i= regFragPtr->thFreeFirst; + pagePtr.i = regFragPtr->thFreeFirst.firstItem; if (pagePtr.i == RNIL) { /* ---------------------------------------------------------------- */ // No prepared tuple header page with free entries exists. @@ -85,14 +85,16 @@ Dbtup::alloc_fix_rec(Fragrecord* const regFragPtr, // We found empty pages on the fragment. Allocate an empty page and // convert it into a tuple header page and put it in thFreeFirst-list. /* ---------------------------------------------------------------- */ - ptrCheckGuard(pagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(pagePtr); + + ndbassert(pagePtr.p->page_state == ZEMPTY_MM); - convertThPage(regTabPtr->m_offsets[MM].m_fix_header_size, - (Fix_page*)pagePtr.p); + convertThPage((Fix_page*)pagePtr.p, regTabPtr, MM); - pagePtr.p->next_page = regFragPtr->thFreeFirst; pagePtr.p->page_state = ZTH_MM_FREE; - regFragPtr->thFreeFirst = pagePtr.i; + + LocalDLList<Page> free_pages(c_page_pool, regFragPtr->thFreeFirst); + free_pages.add(pagePtr); } else { ljam(); /* ---------------------------------------------------------------- */ @@ -106,7 +108,7 @@ Dbtup::alloc_fix_rec(Fragrecord* const regFragPtr, /* THIS SHOULD BE THE COMMON PATH THROUGH THE CODE, FREE */ /* COPY PAGE EXISTED. */ /* ---------------------------------------------------------------- */ - ptrCheckGuard(pagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(pagePtr); } Uint32 page_offset= alloc_tuple_from_page(regFragPtr, (Fix_page*)pagePtr.p); @@ -117,10 +119,11 @@ Dbtup::alloc_fix_rec(Fragrecord* const regFragPtr, return pagePtr.p->m_data + page_offset; } -void Dbtup::convertThPage(Uint32 Tupheadsize, - Fix_page* const regPagePtr) +void Dbtup::convertThPage(Fix_page* regPagePtr, + Tablerec* regTabPtr, + Uint32 mm) { - Uint32 nextTuple = Tupheadsize; + Uint32 nextTuple = regTabPtr->m_offsets[mm].m_fix_header_size; Uint32 endOfList; /* ASSUMES AT LEAST ONE TUPLE HEADER FITS AND THEREFORE NO HANDLING @@ -132,10 +135,19 @@ void Dbtup::convertThPage(Uint32 Tupheadsize, #ifdef VM_TRACE memset(regPagePtr->m_data, 0xF1, 4*Fix_page::DATA_WORDS); #endif + Uint32 gci_pos = 2; + Uint32 gci_val = 0xF1F1F1F1; + if (regTabPtr->m_bits & Tablerec::TR_RowGCI) + { + Tuple_header* ptr = 0; + gci_pos = ptr->get_mm_gci(regTabPtr) - (Uint32*)ptr; + gci_val = 0; + } while (pos + nextTuple <= Fix_page::DATA_WORDS) { regPagePtr->m_data[pos] = (prev << 16) | (pos + nextTuple); - regPagePtr->m_data[pos + 1] = Tuple_header::FREE; + regPagePtr->m_data[pos + 1] = Fix_page::FREE_RECORD; + regPagePtr->m_data[pos + gci_pos] = gci_val; prev = pos; pos += nextTuple; cnt ++; @@ -151,6 +163,7 @@ Uint32 Dbtup::alloc_tuple_from_page(Fragrecord* const regFragPtr, Fix_page* const regPagePtr) { + ndbassert(regPagePtr->free_space); Uint32 idx= regPagePtr->alloc_record(); if(regPagePtr->free_space == 0) { @@ -164,8 +177,8 @@ Dbtup::alloc_tuple_from_page(Fragrecord* const regFragPtr, /* ARE MAINTAINED EVEN AFTER A SYSTEM CRASH. */ /* ---------------------------------------------------------------- */ ndbrequire(regPagePtr->page_state == ZTH_MM_FREE); - regFragPtr->thFreeFirst = regPagePtr->next_page; - regPagePtr->next_page = RNIL; + LocalDLList<Page> free_pages(c_page_pool, regFragPtr->thFreeFirst); + free_pages.remove((Page*)regPagePtr); regPagePtr->page_state = ZTH_MM_FULL; } @@ -183,10 +196,92 @@ void Dbtup::free_fix_rec(Fragrecord* regFragPtr, if(free == 1) { ljam(); + PagePtr pagePtr = { (Page*)regPagePtr, key->m_page_no }; + LocalDLList<Page> free_pages(c_page_pool, regFragPtr->thFreeFirst); ndbrequire(regPagePtr->page_state == ZTH_MM_FULL); regPagePtr->page_state = ZTH_MM_FREE; - regPagePtr->next_page= regFragPtr->thFreeFirst; - regFragPtr->thFreeFirst = key->m_page_no; + free_pages.add(pagePtr); } }//Dbtup::freeTh() + +int +Dbtup::alloc_page(Tablerec* tabPtrP, Fragrecord* fragPtrP, + PagePtr * ret, Uint32 page_no) +{ + Uint32 pages = fragPtrP->noOfPages; + + if (page_no >= pages) + { + Uint32 start = pages; + while(page_no >= pages) + pages += (pages >> 3) + (pages >> 4) + 2; + allocFragPages(fragPtrP, pages - start); + if (page_no >= (pages = fragPtrP->noOfPages)) + { + terrorCode = ZMEM_NOMEM_ERROR; + return 1; + } + } + + PagePtr pagePtr; + c_page_pool.getPtr(pagePtr, getRealpid(fragPtrP, page_no)); + + LocalDLList<Page> alloc_pages(c_page_pool, fragPtrP->emptyPrimPage); + LocalDLList<Page> free_pages(c_page_pool, fragPtrP->thFreeFirst); + if (pagePtr.p->page_state == ZEMPTY_MM) + { + convertThPage((Fix_page*)pagePtr.p, tabPtrP, MM); + pagePtr.p->page_state = ZTH_MM_FREE; + alloc_pages.remove(pagePtr); + free_pages.add(pagePtr); + } + + *ret = pagePtr; + return 0; +} + +Uint32* +Dbtup::alloc_fix_rowid(Fragrecord* const regFragPtr, + Tablerec* const regTabPtr, + Local_key* key, + Uint32 * out_frag_page_id) +{ + Uint32 page_no = key->m_page_no; + Uint32 idx= key->m_page_idx; + + PagePtr pagePtr; + if (alloc_page(regTabPtr, regFragPtr, &pagePtr, page_no)) + { + terrorCode = ZMEM_NOMEM_ERROR; + return 0; + } + + Uint32 state = pagePtr.p->page_state; + LocalDLList<Page> free_pages(c_page_pool, regFragPtr->thFreeFirst); + switch(state){ + case ZTH_MM_FREE: + if (((Fix_page*)pagePtr.p)->alloc_record(idx) != idx) + { + terrorCode = ZROWID_ALLOCATED; + return 0; + } + + if(pagePtr.p->free_space == 0) + { + jam(); + pagePtr.p->page_state = ZTH_MM_FULL; + free_pages.remove(pagePtr); + } + + *out_frag_page_id= page_no; + key->m_page_no = pagePtr.i; + key->m_page_idx = idx; + return pagePtr.p->m_data + idx; + case ZTH_MM_FULL: + terrorCode = ZROWID_ALLOCATED; + return 0; + case ZEMPTY_MM: + ndbrequire(false); + } +} diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp index 694237c85be..94fc9387a1e 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupGen.cpp @@ -42,7 +42,6 @@ void Dbtup::initData() { cnoOfAttrbufrec = ZNO_OF_ATTRBUFREC; cnoOfFragrec = MAX_FRAG_PER_NODE; - cnoOfPage = ZNO_OF_PAGE; cnoOfFragoprec = MAX_FRAG_PER_NODE; cnoOfPageRangeRec = ZNO_OF_PAGE_RANGE_REC; c_maxTriggersPerTable = ZDEFAULT_MAX_NO_TRIGGERS_PER_TABLE; @@ -55,11 +54,11 @@ void Dbtup::initData() Dbtup::Dbtup(const class Configuration & conf, Pgman* pgman) : SimulatedBlock(DBTUP, conf), c_lqh(0), + m_pgman(this, pgman), + c_extent_hash(c_extent_pool), c_storedProcPool(), c_buildIndexList(c_buildIndexPool), - c_undo_buffer(this), - m_pgman(this, pgman), - c_extent_hash(c_extent_pool) + c_undo_buffer(this) { BLOCK_CONSTRUCTOR(Dbtup); @@ -101,12 +100,14 @@ Dbtup::Dbtup(const class Configuration & conf, Pgman* pgman) addRecSignal(GSN_ACC_SCANREQ, &Dbtup::execACC_SCANREQ); addRecSignal(GSN_NEXT_SCANREQ, &Dbtup::execNEXT_SCANREQ); addRecSignal(GSN_ACC_CHECK_SCAN, &Dbtup::execACC_CHECK_SCAN); + addRecSignal(GSN_ACCKEYCONF, &Dbtup::execACCKEYCONF); + addRecSignal(GSN_ACCKEYREF, &Dbtup::execACCKEYREF); + addRecSignal(GSN_ACC_ABORTCONF, &Dbtup::execACC_ABORTCONF); attrbufrec = 0; fragoperrec = 0; fragrecord = 0; hostBuffer = 0; - cpage = 0; pageRange = 0; tablerec = 0; tableDescriptor = 0; @@ -135,10 +136,6 @@ Dbtup::~Dbtup() sizeof(HostBuffer), MAX_NODES); - deallocRecord((void **)&cpage,"Page", - sizeof(Page), - cnoOfPage); - deallocRecord((void **)&pageRange,"PageRange", sizeof(PageRange), cnoOfPageRangeRec); @@ -173,7 +170,7 @@ void Dbtup::execCONTINUEB(Signal* signal) case ZREPORT_MEMORY_USAGE:{ ljam(); static int c_currentMemUsed = 0; - int now = (cnoOfAllocatedPages * 100)/cnoOfPage; + int now = (cnoOfAllocatedPages * 100)/c_page_pool.getSize(); const int thresholds[] = { 100, 90, 80, 0 }; Uint32 i = 0; @@ -197,6 +194,14 @@ void Dbtup::execCONTINUEB(Signal* signal) ljam(); buildIndex(signal, dataPtr); break; + case ZTUP_SCAN: + ljam(); + { + ScanOpPtr scanPtr; + c_scanOpPool.getPtr(scanPtr, dataPtr); + scanCont(signal, scanPtr); + } + return; case ZFREE_EXTENT: { ljam(); @@ -279,7 +284,6 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) ndbrequire(p != 0); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_FRAG, &cnoOfFragrec)); - ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_PAGE, &cnoOfPage)); Uint32 noOfTriggers= 0; @@ -310,6 +314,9 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) Uint32 nScanOp; // use TUX config for now ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUX_SCAN_OP, &nScanOp)); c_scanOpPool.setSize(nScanOp + 1); + Uint32 nScanBatch; + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_BATCH_SIZE, &nScanBatch)); + c_scanLockPool.setSize(nScanOp * nScanBatch); ScanOpPtr lcp; ndbrequire(c_scanOpPool.seize(lcp)); @@ -326,12 +333,16 @@ void Dbtup::execREAD_CONFIG_REQ(Signal* signal) void Dbtup::initRecords() { unsigned i; + Uint32 tmp; + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + + ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_PAGE, &tmp)); // Records with dynamic sizes - cpage = (Page*)allocRecord("Page", - sizeof(Page), - cnoOfPage, - false); + Page* ptr =(Page*)allocRecord("Page", sizeof(Page), tmp, false); + c_page_pool.set(ptr, tmp); attrbufrec = (Attrbufrec*)allocRecord("Attrbufrec", sizeof(Attrbufrec), @@ -353,10 +364,6 @@ void Dbtup::initRecords() sizeof(TableDescriptor), cnoOfTabDescrRec); - Uint32 tmp; - const ndb_mgm_configuration_iterator * p = - theConfiguration.getOwnConfigIterator(); - ndbrequire(p != 0); ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_TUP_OP_RECS, &tmp)); c_operation_pool.setSize(tmp); @@ -531,6 +538,7 @@ void Dbtup::initializeFragrecord() for (regFragPtr.i = 0; regFragPtr.i < cnoOfFragrec; regFragPtr.i++) { refresh_watch_dog(); ptrAss(regFragPtr, fragrecord); + new (regFragPtr.p) Fragrecord(); regFragPtr.p->nextfreefrag = regFragPtr.i + 1; regFragPtr.p->fragStatus = IDLE; }//for @@ -582,7 +590,7 @@ Dbtup::initTab(Tablerec* const regTabPtr) regTabPtr->tabDescriptor = RNIL; regTabPtr->readKeyArray = RNIL; - regTabPtr->checksumIndicator = false; + regTabPtr->m_bits = 0; regTabPtr->m_no_of_attributes = 0; regTabPtr->noOfKeyAttr = 0; diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp index a70b3739270..a9cf39b28f9 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupIndex.cpp @@ -37,8 +37,7 @@ Dbtup::tuxGetTupAddr(Uint32 fragPtrI, { ljamEntry(); PagePtr pagePtr; - pagePtr.i= pageId; - ptrCheckGuard(pagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(pagePtr, pageId); Uint32 fragPageId= pagePtr.p->frag_page_id; tupAddr= (fragPageId << MAX_TUPLES_BITS) | pageIndex; } @@ -115,8 +114,7 @@ Dbtup::tuxGetNode(Uint32 fragPtrI, tablePtr.i= fragPtr.p->fragTableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); PagePtr pagePtr; - pagePtr.i= pageId; - ptrCheckGuard(pagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(pagePtr, pageId); Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE); Uint32 attrDataOffset= AttributeOffset::getOffset( tableDescriptor[attrDescIndex + 1].tabDescr); @@ -219,62 +217,88 @@ Dbtup::tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageIndex, Uint32* dataO tmpOp.m_tuple_location.m_page_idx= pageIndex; KeyReqStruct req_struct; - setup_fixed_part(&req_struct, &tmpOp, tablePtr.p); - if(req_struct.m_tuple_ptr->m_header_bits & Tuple_header::ALLOC) + + PagePtr page_ptr; + Uint32* ptr= get_ptr(&page_ptr, &tmpOp.m_tuple_location, tablePtr.p); + req_struct.m_page_ptr = page_ptr; + req_struct.m_tuple_ptr = (Tuple_header*)ptr; + + int ret = 0; + if (! (req_struct.m_tuple_ptr->m_header_bits & Tuple_header::FREE)) { - Uint32 opPtrI= req_struct.m_tuple_ptr->m_operation_ptr_i; - Operationrec* opPtrP= c_operation_pool.getPtr(opPtrI); - ndbassert(!opPtrP->m_copy_tuple_location.isNull()); - req_struct.m_tuple_ptr= (Tuple_header*) - c_undo_buffer.get_ptr(&opPtrP->m_copy_tuple_location); - } - prepare_read(&req_struct, tablePtr.p, false); - - const Uint32* attrIds= &tableDescriptor[tablePtr.p->readKeyArray].tabDescr; - const Uint32 numAttrs= tablePtr.p->noOfKeyAttr; - // read pk attributes from original tuple - - // save globals - TablerecPtr tabptr_old= tabptr; - FragrecordPtr fragptr_old= fragptr; - OperationrecPtr operPtr_old= operPtr; - - // new globals - tabptr= tablePtr; - fragptr= fragPtr; - operPtr.i= RNIL; - operPtr.p= NULL; + req_struct.check_offset[MM]= tablePtr.p->get_check_offset(MM); + req_struct.check_offset[DD]= tablePtr.p->get_check_offset(DD); + + Uint32 num_attr= tablePtr.p->m_no_of_attributes; + Uint32 descr_start= tablePtr.p->tabDescriptor; + TableDescriptor *tab_descr= &tableDescriptor[descr_start]; + ndbrequire(descr_start + (num_attr << ZAD_LOG_SIZE) <= cnoOfTabDescrRec); + req_struct.attr_descr= tab_descr; - // do it - int ret = readAttributes(&req_struct, - attrIds, - numAttrs, - dataOut, - ZNIL, - xfrmFlag); - // restore globals - tabptr= tabptr_old; - fragptr= fragptr_old; - operPtr= operPtr_old; - // done - if (ret != -1) { - // remove headers - Uint32 n= 0; - Uint32 i= 0; - while (n < numAttrs) { - const AttributeHeader ah(dataOut[i]); - Uint32 size= ah.getDataSize(); - ndbrequire(size != 0); - for (Uint32 j= 0; j < size; j++) { - dataOut[i + j - n]= dataOut[i + j + 1]; + if(req_struct.m_tuple_ptr->m_header_bits & Tuple_header::ALLOC) + { + Uint32 opPtrI= req_struct.m_tuple_ptr->m_operation_ptr_i; + Operationrec* opPtrP= c_operation_pool.getPtr(opPtrI); + ndbassert(!opPtrP->m_copy_tuple_location.isNull()); + req_struct.m_tuple_ptr= (Tuple_header*) + c_undo_buffer.get_ptr(&opPtrP->m_copy_tuple_location); + } + prepare_read(&req_struct, tablePtr.p, false); + + const Uint32* attrIds= &tableDescriptor[tablePtr.p->readKeyArray].tabDescr; + const Uint32 numAttrs= tablePtr.p->noOfKeyAttr; + // read pk attributes from original tuple + + // save globals + TablerecPtr tabptr_old= tabptr; + FragrecordPtr fragptr_old= fragptr; + OperationrecPtr operPtr_old= operPtr; + + // new globals + tabptr= tablePtr; + fragptr= fragPtr; + operPtr.i= RNIL; + operPtr.p= NULL; + + // do it + ret = readAttributes(&req_struct, + attrIds, + numAttrs, + dataOut, + ZNIL, + xfrmFlag); + // restore globals + tabptr= tabptr_old; + fragptr= fragptr_old; + operPtr= operPtr_old; + // done + if (ret != -1) { + // remove headers + Uint32 n= 0; + Uint32 i= 0; + while (n < numAttrs) { + const AttributeHeader ah(dataOut[i]); + Uint32 size= ah.getDataSize(); + ndbrequire(size != 0); + for (Uint32 j= 0; j < size; j++) { + dataOut[i + j - n]= dataOut[i + j + 1]; + } + n+= 1; + i+= 1 + size; } - n+= 1; - i+= 1 + size; + ndbrequire((int)i == ret); + ret -= numAttrs; + } else { + ret= terrorCode ? (-(int)terrorCode) : -1; } - ndbrequire((int)i == ret); - ret -= numAttrs; - } else { - ret= terrorCode ? (-(int)terrorCode) : -1; + } + if (tablePtr.p->m_bits & Tablerec::TR_RowGCI) + { + dataOut[ret] = *req_struct.m_tuple_ptr->get_mm_gci(tablePtr.p); + } + else + { + dataOut[ret] = 0; } return ret; } @@ -454,7 +478,9 @@ Dbtup::buildIndex(Signal* signal, Uint32 buildPtrI) tablePtr.i= buildReq->getTableId(); ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); - const Uint32 firstTupleNo = ! buildPtr.p->m_build_vs ? 0 : 1; + const Uint32 firstTupleNo = 0; + const Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size + + (buildPtr.p->m_build_vs ? Tuple_header::HeaderSize + 1: 0); #ifdef TIME_MEASUREMENT MicroSecondTimer start; @@ -491,8 +517,7 @@ Dbtup::buildIndex(Signal* signal, Uint32 buildPtrI) break; } Uint32 realPageId= getRealpid(fragPtr.p, buildPtr.p->m_pageId); - pagePtr.i= realPageId; - ptrCheckGuard(pagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(pagePtr, realPageId); Uint32 pageState= pagePtr.p->page_state; // skip empty page if (pageState == ZEMPTY_MM) { @@ -504,43 +529,19 @@ Dbtup::buildIndex(Signal* signal, Uint32 buildPtrI) // get tuple Uint32 pageIndex = ~0; const Tuple_header* tuple_ptr = 0; - if (! buildPtr.p->m_build_vs) { - Uint32 tupheadsize= tablePtr.p->m_offsets[MM].m_fix_header_size; - pageIndex = buildPtr.p->m_tupleNo * tupheadsize; - if (pageIndex + tupheadsize > Fix_page::DATA_WORDS) { - ljam(); - buildPtr.p->m_pageId++; - buildPtr.p->m_tupleNo= firstTupleNo; - break; - } - tuple_ptr = (Tuple_header*)&pagePtr.p->m_data[pageIndex]; - // skip over free tuple - if (tuple_ptr->m_header_bits & Tuple_header::FREE) { - ljam(); - buildPtr.p->m_tupleNo++; - break; - } - } else { - pageIndex = buildPtr.p->m_tupleNo; - Var_page* page_ptr = (Var_page*)pagePtr.p; - if (pageIndex >= page_ptr->high_index) { - ljam(); - buildPtr.p->m_pageId++; - buildPtr.p->m_tupleNo= firstTupleNo; - break; - } - Uint32 len= page_ptr->get_entry_len(pageIndex); - if (len == 0) { - ljam(); - buildPtr.p->m_tupleNo++; - break; - } - if (len & Var_page::CHAIN) { - ljam(); - buildPtr.p->m_tupleNo++; - break; - } - tuple_ptr = (Tuple_header*)page_ptr->get_ptr(pageIndex); + pageIndex = buildPtr.p->m_tupleNo * tupheadsize; + if (pageIndex + tupheadsize > Fix_page::DATA_WORDS) { + ljam(); + buildPtr.p->m_pageId++; + buildPtr.p->m_tupleNo= firstTupleNo; + break; + } + tuple_ptr = (Tuple_header*)&pagePtr.p->m_data[pageIndex]; + // skip over free tuple + if (tuple_ptr->m_header_bits & Tuple_header::FREE) { + ljam(); + buildPtr.p->m_tupleNo++; + break; } Uint32 tupVersion= tuple_ptr->get_tuple_version(); OperationrecPtr pageOperPtr; diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp index 9ccff146d17..643863b31a1 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupMeta.cpp @@ -61,6 +61,7 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) //Uint32 noOfNewAttr= (signal->theData[10] & 0xFFFF); /* DICT sends number of character sets in upper half */ Uint32 noOfCharsets= (signal->theData[10] >> 16); + Uint32 gcpIndicator = signal->theData[13]; Uint32 tablespace= signal->theData[14]; Uint32 checksumIndicator= signal->theData[11]; @@ -133,12 +134,6 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) return; } - regFragPtr.p->emptyPrimPage= RNIL; - regFragPtr.p->thFreeFirst= RNIL; - regFragPtr.p->free_var_page_array[0]= RNIL; - regFragPtr.p->free_var_page_array[1]= RNIL; - regFragPtr.p->free_var_page_array[2]= RNIL; - regFragPtr.p->free_var_page_array[3]= RNIL; regFragPtr.p->fragTableId= regTabPtr.i; regFragPtr.p->fragmentId= fragId; regFragPtr.p->m_tablespace_id= tablespace; @@ -181,7 +176,9 @@ void Dbtup::execTUPFRAGREQ(Signal* signal) //----------------------------------------------------------------------------- fragOperPtr.p->definingFragment= true; regTabPtr.p->tableStatus= DEFINING; - regTabPtr.p->checksumIndicator= (checksumIndicator != 0 ? true : false); + regTabPtr.p->m_bits = 0; + regTabPtr.p->m_bits |= (checksumIndicator ? Tablerec::TR_Checksum : 0); + regTabPtr.p->m_bits |= (gcpIndicator ? Tablerec::TR_RowGCI : 0); regTabPtr.p->m_offsets[MM].m_disk_ref_offset= 0; regTabPtr.p->m_offsets[MM].m_null_words= 0; @@ -443,11 +440,17 @@ void Dbtup::execTUP_ADD_ATTRREQ(Signal* signal) * Fix offsets */ Uint32 pos[2] = { 0, 0 }; - if(regTabPtr.p->checksumIndicator) + if (regTabPtr.p->m_bits & Tablerec::TR_Checksum) { pos[0]= 1; } + if (regTabPtr.p->m_bits & Tablerec::TR_RowGCI) + { + pos[MM]++; + pos[DD]++; + } + regTabPtr.p->m_no_of_disk_attributes= regTabPtr.p->m_attributes[DD].m_no_of_fixsize + regTabPtr.p->m_attributes[DD].m_no_of_varsize; diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp index d95dd7b9eb0..487c55c0d49 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPagMan.cpp @@ -121,10 +121,10 @@ void Dbtup::initializePage() cfreepageList[i] = RNIL; }//for PagePtr pagePtr; - for (pagePtr.i = 0; pagePtr.i < cnoOfPage; pagePtr.i++) { + for (pagePtr.i = 0; pagePtr.i < c_page_pool.getSize(); pagePtr.i++) { ljam(); refresh_watch_dog(); - ptrAss(pagePtr, cpage); + c_page_pool.getPtr(pagePtr); pagePtr.p->physical_page_id= RNIL; pagePtr.p->next_page = pagePtr.i + 1; pagePtr.p->first_cluster_page = RNIL; @@ -133,24 +133,20 @@ void Dbtup::initializePage() pagePtr.p->prev_page = RNIL; pagePtr.p->page_state = ZFREE_COMMON; }//for - pagePtr.i = cnoOfPage - 1; - ptrAss(pagePtr, cpage); pagePtr.p->next_page = RNIL; + /** + * Page 0 cant be part of buddy as + * it will scan left right when searching for bigger blocks, + * if 0 is part of arrat, it can search to -1...which is not good + */ pagePtr.i = 0; - ptrAss(pagePtr, cpage); + c_page_pool.getPtr(pagePtr); pagePtr.p->page_state = ~ZFREE_COMMON; - for(size_t j = 0; j<MAX_PARALLELL_TUP_SRREQ; j++){ - pagePtr.i = 1+j; - ptrAss(pagePtr, cpage); - pagePtr.p->page_state = ~ZFREE_COMMON; - } - - Uint32 tmp = 1 + MAX_PARALLELL_TUP_SRREQ; - returnCommonArea(tmp, cnoOfPage - tmp); + Uint32 tmp = 1; + returnCommonArea(tmp, c_page_pool.getSize() - tmp); cnoOfAllocatedPages = tmp; // Is updated by returnCommonArea - c_sr_free_page_0 = ~0; }//Dbtup::initializePage() void Dbtup::allocConsPages(Uint32 noOfPagesToAllocate, @@ -234,7 +230,7 @@ void Dbtup::findFreeLeftNeighbours(Uint32& allocPageRef, while (allocPageRef > 0) { ljam(); pageLastPtr.i = allocPageRef - 1; - ptrCheckGuard(pageLastPtr, cnoOfPage, cpage); + c_page_pool.getPtr(pageLastPtr); if (pageLastPtr.p->page_state != ZFREE_COMMON) { ljam(); return; @@ -272,10 +268,10 @@ void Dbtup::findFreeRightNeighbours(Uint32& allocPageRef, ljam(); return; }//if - while ((allocPageRef + noPagesAllocated) < cnoOfPage) { + while ((allocPageRef + noPagesAllocated) < c_page_pool.getSize()) { ljam(); pageFirstPtr.i = allocPageRef + noPagesAllocated; - ptrCheckGuard(pageFirstPtr, cnoOfPage, cpage); + c_page_pool.getPtr(pageFirstPtr); if (pageFirstPtr.p->page_state != ZFREE_COMMON) { ljam(); return; @@ -307,8 +303,7 @@ void Dbtup::insertCommonArea(Uint32 insPageRef, Uint32 insList) cnoOfAllocatedPages -= (1 << insList); PagePtr pageLastPtr, pageInsPtr; - pageInsPtr.i = insPageRef; - ptrCheckGuard(pageInsPtr, cnoOfPage, cpage); + c_page_pool.getPtr(pageInsPtr, insPageRef); ndbrequire(insList < 16); pageLastPtr.i = (pageInsPtr.i + (1 << insList)) - 1; @@ -316,8 +311,8 @@ void Dbtup::insertCommonArea(Uint32 insPageRef, Uint32 insList) pageInsPtr.p->prev_cluster_page = RNIL; pageInsPtr.p->last_cluster_page = pageLastPtr.i; cfreepageList[insList] = pageInsPtr.i; - - ptrCheckGuard(pageLastPtr, cnoOfPage, cpage); + + c_page_pool.getPtr(pageLastPtr); pageLastPtr.p->first_cluster_page = pageInsPtr.i; pageLastPtr.p->next_page = RNIL; }//Dbtup::insertCommonArea() @@ -327,8 +322,7 @@ void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list) cnoOfAllocatedPages += (1 << list); PagePtr pagePrevPtr, pageNextPtr, pageLastPtr, pageSearchPtr, remPagePtr; - remPagePtr.i = remPageRef; - ptrCheckGuard(remPagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(remPagePtr, remPageRef); ndbrequire(list < 16); if (cfreepageList[list] == remPagePtr.i) { ljam(); @@ -336,14 +330,14 @@ void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list) pageNextPtr.i = cfreepageList[list]; if (pageNextPtr.i != RNIL) { ljam(); - ptrCheckGuard(pageNextPtr, cnoOfPage, cpage); + c_page_pool.getPtr(pageNextPtr); pageNextPtr.p->prev_cluster_page = RNIL; }//if } else { pageSearchPtr.i = cfreepageList[list]; while (true) { ljam(); - ptrCheckGuard(pageSearchPtr, cnoOfPage, cpage); + c_page_pool.getPtr(pageSearchPtr); pagePrevPtr = pageSearchPtr; pageSearchPtr.i = pageSearchPtr.p->next_cluster_page; if (pageSearchPtr.i == remPagePtr.i) { @@ -355,7 +349,7 @@ void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list) pagePrevPtr.p->next_cluster_page = pageNextPtr.i; if (pageNextPtr.i != RNIL) { ljam(); - ptrCheckGuard(pageNextPtr, cnoOfPage, cpage); + c_page_pool.getPtr(pageNextPtr); pageNextPtr.p->prev_cluster_page = pagePrevPtr.i; }//if }//if @@ -364,6 +358,6 @@ void Dbtup::removeCommonArea(Uint32 remPageRef, Uint32 list) remPagePtr.p->prev_cluster_page= RNIL; pageLastPtr.i = (remPagePtr.i + (1 << list)) - 1; - ptrCheckGuard(pageLastPtr, cnoOfPage, cpage); + c_page_pool.getPtr(pageLastPtr); pageLastPtr.p->first_cluster_page= RNIL; }//Dbtup::removeCommonArea() diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp index 5b3e3eb273d..c8c9440e456 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupPageMap.cpp @@ -91,20 +91,20 @@ Uint32 Dbtup::getEmptyPage(Fragrecord* const regFragPtr) { - Uint32 pageId = regFragPtr->emptyPrimPage; + Uint32 pageId = regFragPtr->emptyPrimPage.firstItem; if (pageId == RNIL) { ljam(); allocMoreFragPages(regFragPtr); - pageId = regFragPtr->emptyPrimPage; + pageId = regFragPtr->emptyPrimPage.firstItem; if (pageId == RNIL) { ljam(); return RNIL; }//if }//if PagePtr pagePtr; - pagePtr.i = pageId; - ptrCheckGuard(pagePtr, cnoOfPage, cpage); - regFragPtr->emptyPrimPage = pagePtr.p->next_page; + LocalDLList<Page> alloc_pages(c_page_pool, regFragPtr->emptyPrimPage); + alloc_pages.getPtr(pagePtr, pageId); + alloc_pages.remove(pagePtr); return pageId; }//Dbtup::getEmptyPage() @@ -284,6 +284,22 @@ void Dbtup::releaseFragPages(Fragrecord* const regFragPtr) ljam(); ndbrequire(regPRPtr.i == regFragPtr->rootPageRange); initFragRange(regFragPtr); + for (Uint32 i = 0; i<MAX_FREE_LIST; i++) + { + LocalDLList<Page> tmp(c_page_pool, regFragPtr->free_var_page_array[i]); + tmp.remove(); + } + + { + LocalDLList<Page> tmp(c_page_pool, regFragPtr->emptyPrimPage); + tmp.remove(); + } + + { + LocalDLList<Page> tmp(c_page_pool, regFragPtr->thFreeFirst); + tmp.remove(); + } + return; } else { if (regPRPtr.p->type[indexPos] == ZNON_LEAF) { @@ -327,7 +343,6 @@ void Dbtup::initializePageRange() void Dbtup::initFragRange(Fragrecord* const regFragPtr) { - regFragPtr->emptyPrimPage = RNIL; regFragPtr->rootPageRange = RNIL; regFragPtr->currentPageRange = RNIL; regFragPtr->noOfPages = 0; @@ -365,19 +380,32 @@ Uint32 Dbtup::allocFragPages(Fragrecord* const regFragPtr, Uint32 tafpNoAllocReq /* THOSE PAGES TO EMPTY_MM AND LINK THEM INTO THE EMPTY */ /* PAGE LIST OF THE FRAGMENT. */ /* ---------------------------------------------------------------- */ + Uint32 prev = RNIL; for (loopPagePtr.i = retPageRef; loopPagePtr.i < loopLimit; loopPagePtr.i++) { ljam(); - ptrCheckGuard(loopPagePtr, cnoOfPage, cpage); + c_page_pool.getPtr(loopPagePtr); loopPagePtr.p->page_state = ZEMPTY_MM; loopPagePtr.p->frag_page_id = startRange + (loopPagePtr.i - retPageRef); loopPagePtr.p->physical_page_id = loopPagePtr.i; - loopPagePtr.p->next_page = loopPagePtr.i + 1; + loopPagePtr.p->nextList = loopPagePtr.i + 1; + loopPagePtr.p->prevList = prev; + prev = loopPagePtr.i; }//for - loopPagePtr.i = (retPageRef + noOfPagesAllocated) - 1; - ptrCheckGuard(loopPagePtr, cnoOfPage, cpage); - loopPagePtr.p->next_page = regFragPtr->emptyPrimPage; - regFragPtr->emptyPrimPage = retPageRef; + loopPagePtr.i--; + ndbassert(loopPagePtr.p == c_page_pool.getPtr(loopPagePtr.i)); + loopPagePtr.p->nextList = RNIL; + + LocalDLList<Page> alloc(c_page_pool, regFragPtr->emptyPrimPage); + if (noOfPagesAllocated > 1) + { + alloc.add(retPageRef, loopPagePtr); + } + else + { + alloc.add(loopPagePtr); + } + /* ---------------------------------------------------------------- */ /* WAS ENOUGH PAGES ALLOCATED OR ARE MORE NEEDED. */ /* ---------------------------------------------------------------- */ diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp index cd9307321e1..fcb8c8aaa34 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp @@ -1178,6 +1178,18 @@ Dbtup::read_pseudo(Uint32 attrId, outBuffer[2] = signal->theData[2]; outBuffer[3] = signal->theData[3]; return 4; + case AttributeHeader::ROWID: + outBuffer[0] = req_struct->frag_page_id; + outBuffer[1] = operPtr.p->m_tuple_location.m_page_idx; + return 2; + case AttributeHeader::ROW_GCI: + if (tabptr.p->m_bits & Tablerec::TR_RowGCI) + { + Uint64 tmp = * req_struct->m_tuple_ptr->get_mm_gci(tabptr.p); + memcpy(outBuffer, &tmp, sizeof(tmp)); + return 2; + } + return 0; default: return 0; } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp index b5819ce268c..177d5c7cc08 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupScan.cpp @@ -18,12 +18,20 @@ #include "Dbtup.hpp" #include <signaldata/AccScan.hpp> #include <signaldata/NextScan.hpp> +#include <signaldata/AccLock.hpp> +#include <md5_hash.hpp> #undef jam #undef jamEntry #define jam() { jamLine(32000 + __LINE__); } #define jamEntry() { jamEntryLine(32000 + __LINE__); } +#ifdef VM_TRACE +#define dbg(x) globalSignalLoggers.log x +#else +#define dbg(x) +#endif + void Dbtup::execACC_SCANREQ(Signal* signal) { @@ -33,7 +41,7 @@ Dbtup::execACC_SCANREQ(Signal* signal) ScanOpPtr scanPtr; scanPtr.i = RNIL; do { - // find table and fragments + // find table and fragment TablerecPtr tablePtr; tablePtr.i = req->tableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); @@ -43,49 +51,49 @@ Dbtup::execACC_SCANREQ(Signal* signal) getFragmentrec(fragPtr, fragId, tablePtr.p); ndbrequire(fragPtr.i != RNIL); Fragrecord& frag = *fragPtr.p; - // seize from pool and link to per-fragment list - - Uint32 bits= 0; - if(frag.m_lcp_scan_op != RNIL) - { - bits |= ScanOp::SCAN_LCP; - ndbrequire(frag.m_lcp_scan_op == c_lcp_scan_op); - c_scanOpPool.getPtr(scanPtr, frag.m_lcp_scan_op); - } - else - { + // flags + Uint32 bits = 0; + if (frag.m_lcp_scan_op == RNIL) { + // seize from pool and link to per-fragment list LocalDLList<ScanOp> list(c_scanOpPool, frag.m_scanList); - if (! list.seize(scanPtr)) - { + if (! list.seize(scanPtr)) { jam(); break; } - } - new (scanPtr.p) ScanOp(); - ScanOp& scan = *scanPtr.p; - scan.m_state = ScanOp::First; - // TODO scan disk only if any scanned attribute on disk - - if(! (bits & ScanOp::SCAN_LCP)) - { - /** - * Remove this until disk scan has been implemented - */ - if(tablePtr.p->m_attributes[DD].m_no_of_fixsize > 0 || - tablePtr.p->m_attributes[DD].m_no_of_varsize > 0) + if (!AccScanReq::getNoDiskScanFlag(req->requestInfo) + && tablePtr.p->m_no_of_disk_attributes) { - bits |= ScanOp::SCAN_DD; + bits |= ScanOp::SCAN_DD; + } + bool mm = (bits & ScanOp::SCAN_DD); + if (tablePtr.p->m_attributes[mm].m_no_of_varsize > 0) { + bits |= ScanOp::SCAN_VS; - if (tablePtr.p->m_attributes[DD].m_no_of_varsize > 0) - bits |= ScanOp::SCAN_DD_VS; + // disk pages have fixed page format + ndbrequire(! (bits & ScanOp::SCAN_DD)); + } + if (! AccScanReq::getReadCommittedFlag(req->requestInfo)) { + if (AccScanReq::getLockMode(req->requestInfo) == 0) + bits |= ScanOp::SCAN_LOCK_SH; + else + bits |= ScanOp::SCAN_LOCK_EX; + } + } else { + jam(); + ndbrequire(frag.m_lcp_scan_op == c_lcp_scan_op); + c_scanOpPool.getPtr(scanPtr, frag.m_lcp_scan_op); + bits |= ScanOp::SCAN_LCP; + if (tablePtr.p->m_attributes[MM].m_no_of_varsize > 0) { + bits |= ScanOp::SCAN_VS; } - } - - if(tablePtr.p->m_attributes[MM].m_no_of_varsize) - { - bits |= ScanOp::SCAN_VS; } + bits |= AccScanReq::getNRScanFlag(req->requestInfo) ? ScanOp::SCAN_NR : 0; + + // set up scan op + new (scanPtr.p) ScanOp(); + ScanOp& scan = *scanPtr.p; + scan.m_state = ScanOp::First; scan.m_bits = bits; scan.m_userPtr = req->senderData; scan.m_userRef = req->senderRef; @@ -94,18 +102,18 @@ Dbtup::execACC_SCANREQ(Signal* signal) scan.m_fragPtrI = fragPtr.i; scan.m_transId1 = req->transId1; scan.m_transId2 = req->transId2; + scan.m_savePointId = req->savePointId; + // conf AccScanConf* const conf = (AccScanConf*)signal->getDataPtrSend(); conf->scanPtr = req->senderData; conf->accPtr = scanPtr.i; conf->flag = AccScanConf::ZNOT_EMPTY_FRAGMENT; - sendSignal(req->senderRef, GSN_ACC_SCANCONF, signal, - AccScanConf::SignalLength, JBB); - + sendSignal(req->senderRef, GSN_ACC_SCANCONF, + signal, AccScanConf::SignalLength, JBB); return; } while (0); - if (scanPtr.i != RNIL) - { + if (scanPtr.i != RNIL) { jam(); releaseScanOp(scanPtr); } @@ -129,10 +137,21 @@ Dbtup::execNEXT_SCANREQ(Signal* signal) break; case NextScanReq::ZSCAN_NEXT_COMMIT: jam(); - break; case NextScanReq::ZSCAN_COMMIT: jam(); - { + if ((scan.m_bits & ScanOp::SCAN_LOCK) != 0) { + jam(); + AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); + lockReq->returnCode = RNIL; + lockReq->requestInfo = AccLockReq::Unlock; + lockReq->accOpPtr = req->accOperationPtr; + EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, + signal, AccLockReq::UndoSignalLength); + jamEntry(); + ndbrequire(lockReq->returnCode == AccLockReq::Success); + removeAccLockOp(scan, req->accOperationPtr); + } + if (req->scanFlag == NextScanReq::ZSCAN_COMMIT) { NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; unsigned signalLength = 1; @@ -143,6 +162,35 @@ Dbtup::execNEXT_SCANREQ(Signal* signal) break; case NextScanReq::ZSCAN_CLOSE: jam(); + if (scan.m_bits & ScanOp::SCAN_LOCK_WAIT) { + jam(); + ndbrequire(scan.m_accLockOp != RNIL); + // use ACC_ABORTCONF to flush out any reply in job buffer + AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); + lockReq->returnCode = RNIL; + lockReq->requestInfo = AccLockReq::AbortWithConf; + lockReq->accOpPtr = scan.m_accLockOp; + EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, + signal, AccLockReq::UndoSignalLength); + jamEntry(); + ndbrequire(lockReq->returnCode == AccLockReq::Success); + scan.m_state = ScanOp::Aborting; + return; + } + if (scan.m_state == ScanOp::Locked) { + jam(); + ndbrequire(scan.m_accLockOp != RNIL); + AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); + lockReq->returnCode = RNIL; + lockReq->requestInfo = AccLockReq::Unlock; + lockReq->accOpPtr = scan.m_accLockOp; + EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, + signal, AccLockReq::UndoSignalLength); + jamEntry(); + ndbrequire(lockReq->returnCode == AccLockReq::Success); + scan.m_accLockOp = RNIL; + } + scan.m_state = ScanOp::Aborting; scanClose(signal, scanPtr); return; case NextScanReq::ZSCAN_NEXT_ABORT: @@ -169,6 +217,7 @@ Dbtup::execACC_CHECK_SCAN(Signal* signal) ScanOpPtr scanPtr; c_scanOpPool.getPtr(scanPtr, req->accPtr); ScanOp& scan = *scanPtr.p; + // fragment FragrecordPtr fragPtr; fragPtr.i = scan.m_fragPtrI; ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); @@ -181,28 +230,162 @@ Dbtup::execACC_CHECK_SCAN(Signal* signal) jamEntry(); return; } + if (scan.m_bits & ScanOp::SCAN_LOCK_WAIT) { + jam(); + // LQH asks if we are waiting for lock and we tell it to ask again + NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); + conf->scanPtr = scan.m_userPtr; + conf->accOperationPtr = RNIL; // no tuple returned + conf->fragId = frag.fragmentId; + unsigned signalLength = 3; + // if TC has ordered scan close, it will be detected here + sendSignal(scan.m_userRef, GSN_NEXT_SCANCONF, + signal, signalLength, JBB); + return; // stop + } if (scan.m_state == ScanOp::First) { jam(); - scanFirst(signal, fragPtr.p, scanPtr); + scanFirst(signal, scanPtr); } if (scan.m_state == ScanOp::Next) { jam(); - scanNext(signal, fragPtr.p, scanPtr); + bool immediate = scanNext(signal, scanPtr); + if (! immediate) { + jam(); + // time-slicing via TUP or PGMAN + return; + } } + scanReply(signal, scanPtr); +} + +void +Dbtup::scanReply(Signal* signal, ScanOpPtr scanPtr) +{ + ScanOp& scan = *scanPtr.p; + FragrecordPtr fragPtr; + fragPtr.i = scan.m_fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + Fragrecord& frag = *fragPtr.p; + // for reading tuple key in Current state + Uint32* pkData = (Uint32*)c_dataBuffer; + unsigned pkSize = 0; + if (scan.m_state == ScanOp::Current) { + // found an entry to return + jam(); + ndbrequire(scan.m_accLockOp == RNIL); + if (scan.m_bits & ScanOp::SCAN_LOCK) { + jam(); + // read tuple key - use TUX routine + const ScanPos& pos = scan.m_scanPos; + const Local_key& key_mm = pos.m_key_mm; + int ret = tuxReadPk(fragPtr.i, pos.m_realpid_mm, key_mm.m_page_idx, + pkData, false); + ndbrequire(ret > 0); + pkSize = ret; + dbg((DBTUP, "PK size=%d data=%08x", pkSize, pkData[0])); + // get read lock or exclusive lock + AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); + lockReq->returnCode = RNIL; + lockReq->requestInfo = (scan.m_bits & ScanOp::SCAN_LOCK_SH) ? + AccLockReq::LockShared : AccLockReq::LockExclusive; + lockReq->accOpPtr = RNIL; + lockReq->userPtr = scanPtr.i; + lockReq->userRef = reference(); + lockReq->tableId = scan.m_tableId; + lockReq->fragId = frag.fragmentId; + lockReq->fragPtrI = RNIL; // no cached frag ptr yet + lockReq->hashValue = md5_hash((Uint64*)pkData, pkSize); + lockReq->tupAddr = key_mm.ref(); + lockReq->transId1 = scan.m_transId1; + lockReq->transId2 = scan.m_transId2; + EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, + signal, AccLockReq::LockSignalLength); + jamEntry(); + switch (lockReq->returnCode) { + case AccLockReq::Success: + jam(); + scan.m_state = ScanOp::Locked; + scan.m_accLockOp = lockReq->accOpPtr; + break; + case AccLockReq::IsBlocked: + jam(); + // normal lock wait + scan.m_state = ScanOp::Blocked; + scan.m_bits |= ScanOp::SCAN_LOCK_WAIT; + scan.m_accLockOp = lockReq->accOpPtr; + // LQH will wake us up + signal->theData[0] = scan.m_userPtr; + signal->theData[1] = true; + EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2); + jamEntry(); + return; + break; + case AccLockReq::Refused: + jam(); + // we cannot see deleted tuple (assert only) + ndbassert(false); + // skip it + scan.m_state = ScanOp::Next; + signal->theData[0] = scan.m_userPtr; + signal->theData[1] = true; + EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2); + jamEntry(); + return; + break; + case AccLockReq::NoFreeOp: + jam(); + // max ops should depend on max scans (assert only) + ndbassert(false); + // stay in Current state + scan.m_state = ScanOp::Current; + signal->theData[0] = scan.m_userPtr; + signal->theData[1] = true; + EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2); + jamEntry(); + return; + break; + default: + ndbrequire(false); + break; + } + } else { + scan.m_state = ScanOp::Locked; + } + } + if (scan.m_state == ScanOp::Locked) { + // we have lock or do not need one jam(); - const PagePos& pos = scan.m_scanPos; + // conf signal NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; - conf->accOperationPtr = (Uint32)-1; // no lock returned + // the lock is passed to LQH + Uint32 accLockOp = scan.m_accLockOp; + if (accLockOp != RNIL) { + scan.m_accLockOp = RNIL; + // remember it until LQH unlocks it + addAccLockOp(scan, accLockOp); + } else { + ndbrequire(! (scan.m_bits & ScanOp::SCAN_LOCK)); + // operation RNIL in LQH would signal no tuple returned + accLockOp = (Uint32)-1; + } + const ScanPos& pos = scan.m_scanPos; + conf->accOperationPtr = accLockOp; conf->fragId = frag.fragmentId; - conf->localKey[0] = pos.m_key.ref(); + conf->localKey[0] = pos.m_key_mm.ref(); conf->localKey[1] = 0; conf->localKeyLength = 1; unsigned signalLength = 6; - Uint32 blockNo = refToBlock(scan.m_userRef); - EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, signalLength); - jamEntry(); + if (scan.m_bits & ScanOp::SCAN_LOCK) { + sendSignal(scan.m_userRef, GSN_NEXT_SCANCONF, + signal, signalLength, JBB); + } else { + Uint32 blockNo = refToBlock(scan.m_userRef); + EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, signalLength); + jamEntry(); + } // next time look for next entry scan.m_state = ScanOp::Next; return; @@ -222,122 +405,491 @@ Dbtup::execACC_CHECK_SCAN(Signal* signal) ndbrequire(false); } +/* + * Lock succeeded (after delay) in ACC. If the lock is for current + * entry, set state to Locked. If the lock is for an entry we were + * moved away from, simply unlock it. Finally, if we are closing the + * scan, do nothing since we have already sent an abort request. + */ void -Dbtup::scanFirst(Signal*, Fragrecord* fragPtrP, ScanOpPtr scanPtr) +Dbtup::execACCKEYCONF(Signal* signal) { + jamEntry(); + ScanOpPtr scanPtr; + scanPtr.i = signal->theData[0]; + c_scanOpPool.getPtr(scanPtr); ScanOp& scan = *scanPtr.p; - // set to first fragment, first page, first tuple - const Uint32 first_page_idx = scan.m_bits & ScanOp::SCAN_VS ? 1 : 0; - PagePos& pos = scan.m_scanPos; - pos.m_key.m_page_no = 0; - pos.m_key.m_page_idx = first_page_idx; - // just before - pos.m_match = false; - // let scanNext() do the work - scan.m_state = ScanOp::Next; + ndbrequire(scan.m_bits & ScanOp::SCAN_LOCK_WAIT && scan.m_accLockOp != RNIL); + scan.m_bits &= ~ ScanOp::SCAN_LOCK_WAIT; + if (scan.m_state == ScanOp::Blocked) { + // the lock wait was for current entry + jam(); + scan.m_state = ScanOp::Locked; + // LQH has the ball + return; + } + if (scan.m_state != ScanOp::Aborting) { + // we were moved, release lock + jam(); + AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); + lockReq->returnCode = RNIL; + lockReq->requestInfo = AccLockReq::Unlock; + lockReq->accOpPtr = scan.m_accLockOp; + EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength); + jamEntry(); + ndbrequire(lockReq->returnCode == AccLockReq::Success); + scan.m_accLockOp = RNIL; + // LQH has the ball + return; + } + // lose the lock + scan.m_accLockOp = RNIL; + // continue at ACC_ABORTCONF +} - if (scan.m_bits & ScanOp::SCAN_DD) - { - pos.m_extent_info_ptr_i = - fragPtrP->m_disk_alloc_info.m_extent_list.firstItem; +/* + * Lock failed (after delay) in ACC. Probably means somebody ahead of + * us in lock queue deleted the tuple. + */ +void +Dbtup::execACCKEYREF(Signal* signal) +{ + jamEntry(); + ScanOpPtr scanPtr; + scanPtr.i = signal->theData[0]; + c_scanOpPool.getPtr(scanPtr); + ScanOp& scan = *scanPtr.p; + ndbrequire(scan.m_bits & ScanOp::SCAN_LOCK_WAIT && scan.m_accLockOp != RNIL); + scan.m_bits &= ~ ScanOp::SCAN_LOCK_WAIT; + if (scan.m_state != ScanOp::Aborting) { + jam(); + // release the operation + AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend(); + lockReq->returnCode = RNIL; + lockReq->requestInfo = AccLockReq::Abort; + lockReq->accOpPtr = scan.m_accLockOp; + EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength); + jamEntry(); + ndbrequire(lockReq->returnCode == AccLockReq::Success); + scan.m_accLockOp = RNIL; + // scan position should already have been moved (assert only) + if (scan.m_state == ScanOp::Blocked) { + jam(); + //ndbassert(false); + if (scan.m_bits & ScanOp::SCAN_NR) + { + jam(); + scan.m_state = ScanOp::Next; + scan.m_scanPos.m_get = ScanPos::Get_tuple; + ndbout_c("Ignoring scan.m_state == ScanOp::Blocked, refetch"); + } + else + { + jam(); + scan.m_state = ScanOp::Next; + ndbout_c("Ignoring scan.m_state == ScanOp::Blocked"); + } + } + // LQH has the ball + return; } + // lose the lock + scan.m_accLockOp = RNIL; + // continue at ACC_ABORTCONF } +/* + * Received when scan is closing. This signal arrives after any + * ACCKEYCON or ACCKEYREF which may have been in job buffer. + */ void -Dbtup::scanNext(Signal* signal, Fragrecord* fragPtrP, ScanOpPtr scanPtr) +Dbtup::execACC_ABORTCONF(Signal* signal) { + jamEntry(); + ScanOpPtr scanPtr; + scanPtr.i = signal->theData[0]; + c_scanOpPool.getPtr(scanPtr); ScanOp& scan = *scanPtr.p; - PagePos& pos = scan.m_scanPos; - Uint32 bits = scan.m_bits; + ndbrequire(scan.m_state == ScanOp::Aborting); + // most likely we are still in lock wait + if (scan.m_bits & ScanOp::SCAN_LOCK_WAIT) { + jam(); + scan.m_bits &= ~ ScanOp::SCAN_LOCK_WAIT; + scan.m_accLockOp = RNIL; + } + scanClose(signal, scanPtr); +} + +void +Dbtup::scanFirst(Signal*, ScanOpPtr scanPtr) +{ + ScanOp& scan = *scanPtr.p; + ScanPos& pos = scan.m_scanPos; Local_key& key = pos.m_key; + const Uint32 bits = scan.m_bits; + // fragment + FragrecordPtr fragPtr; + fragPtr.i = scan.m_fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + Fragrecord& frag = *fragPtr.p; + // in the future should not pre-allocate pages + if (frag.noOfPages == 0) { + jam(); + scan.m_state = ScanOp::Last; + return; + } + if (! (bits & ScanOp::SCAN_DD)) { + key.m_file_no = ZNIL; + key.m_page_no = 0; + pos.m_get = ScanPos::Get_page_mm; + // for MM scan real page id is cached for efficiency + pos.m_realpid_mm = RNIL; + } else { + Disk_alloc_info& alloc = frag.m_disk_alloc_info; + // for now must check disk part explicitly + if (alloc.m_extent_list.firstItem == RNIL) { + jam(); + scan.m_state = ScanOp::Last; + return; + } + pos.m_extent_info_ptr_i = alloc.m_extent_list.firstItem; + Extent_info* ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i); + key.m_file_no = ext->m_key.m_file_no; + key.m_page_no = ext->m_first_page_no; + pos.m_get = ScanPos::Get_page_dd; + } + key.m_page_idx = 0; + // let scanNext() do the work + scan.m_state = ScanOp::Next; +} + +bool +Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr) +{ + ScanOp& scan = *scanPtr.p; + ScanPos& pos = scan.m_scanPos; + Local_key& key = pos.m_key; + const Uint32 bits = scan.m_bits; + // table TablerecPtr tablePtr; tablePtr.i = scan.m_tableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); - Fragrecord& frag = *fragPtrP; - const Uint32 first_page_idx = bits & ScanOp::SCAN_VS ? 1 : 0; + Tablerec& table = *tablePtr.p; + // fragment + FragrecordPtr fragPtr; + fragPtr.i = scan.m_fragPtrI; + ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); + Fragrecord& frag = *fragPtr.p; + // tuple found + Tuple_header* th = 0; + Uint32 loop_count = 0; + Uint32 scanGCI = scanPtr.p->m_scanGCI; + Uint32 foundGCI; + + bool mm = (bits & ScanOp::SCAN_DD); + Uint32 size = table.m_offsets[mm].m_fix_header_size + + (bits & ScanOp::SCAN_VS ? Tuple_header::HeaderSize + 1: 0); while (true) { - // TODO time-slice here after X loops - jam(); - // get page - PagePtr pagePtr; - if (key.m_page_no >= frag.noOfPages) { + switch (pos.m_get) { + case ScanPos::Get_next_page: + // move to next page jam(); - scan.m_state = ScanOp::Last; - break; - } - Uint32 realPageId = getRealpid(fragPtrP, key.m_page_no); - pagePtr.i = realPageId; - ptrCheckGuard(pagePtr, cnoOfPage, cpage); - Uint32 pageState = pagePtr.p->page_state; - // skip empty page - if (pageState == ZEMPTY_MM) { + { + if (! (bits & ScanOp::SCAN_DD)) + pos.m_get = ScanPos::Get_next_page_mm; + else + pos.m_get = ScanPos::Get_next_page_dd; + } + continue; + case ScanPos::Get_page: + // get real page jam(); - key.m_page_no++; - key.m_page_idx = first_page_idx; - pos.m_match = false; + { + if (! (bits & ScanOp::SCAN_DD)) + pos.m_get = ScanPos::Get_page_mm; + else + pos.m_get = ScanPos::Get_page_dd; + } continue; - } - // get next tuple - const Tuple_header* th = 0; - if (! (bits & ScanOp::SCAN_VS)) { - Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size; - if (pos.m_match) - key.m_page_idx += tupheadsize; - pos.m_match = true; - if (key.m_page_idx + tupheadsize > Fix_page::DATA_WORDS) { - jam(); + case ScanPos::Get_next_page_mm: + // move to next logical TUP page + jam(); + { key.m_page_no++; - key.m_page_idx = first_page_idx; - pos.m_match = false; - continue; + if (key.m_page_no >= frag.noOfPages) { + jam(); + // no more pages, scan ends + pos.m_get = ScanPos::Get_undef; + scan.m_state = ScanOp::Last; + return true; + } + key.m_page_idx = 0; + pos.m_get = ScanPos::Get_page_mm; + // clear cached value + pos.m_realpid_mm = RNIL; } - th = (Tuple_header*)&pagePtr.p->m_data[key.m_page_idx]; - // skip over free tuple - if (th->m_header_bits & Tuple_header::FREE) { + /*FALLTHRU*/ + case ScanPos::Get_page_mm: + // get TUP real page + jam(); + { + if (pos.m_realpid_mm == RNIL) { jam(); - continue; + pos.m_realpid_mm = getRealpid(fragPtr.p, key.m_page_no); + } + PagePtr pagePtr; + c_page_pool.getPtr(pagePtr, pos.m_realpid_mm); + + if (pagePtr.p->page_state == ZEMPTY_MM) { + // skip empty page + jam(); + pos.m_get = ScanPos::Get_next_page_mm; + break; // incr loop count + } + pos.m_page = pagePtr.p; + pos.m_get = ScanPos::Get_tuple; } - } else { - Var_page* page_ptr = (Var_page*)pagePtr.p; - if (pos.m_match) - key.m_page_idx += 1; - pos.m_match = true; - if (key.m_page_idx >= page_ptr->high_index) { - jam(); + continue; + case ScanPos::Get_next_page_dd: + // move to next disk page + jam(); + { + Disk_alloc_info& alloc = frag.m_disk_alloc_info; + LocalSLList<Extent_info, Extent_list_t> + list(c_extent_pool, alloc.m_extent_list); + Ptr<Extent_info> ext_ptr; + c_extent_pool.getPtr(ext_ptr, pos.m_extent_info_ptr_i); + Extent_info* ext = ext_ptr.p; key.m_page_no++; - key.m_page_idx = first_page_idx; - pos.m_match = false; - continue; + if (key.m_page_no >= ext->m_first_page_no + alloc.m_extent_size) { + // no more pages in this extent + jam(); + if (! list.next(ext_ptr)) { + // no more extents, scan ends + jam(); + pos.m_get = ScanPos::Get_undef; + scan.m_state = ScanOp::Last; + return true; + } else { + // move to next extent + jam(); + pos.m_extent_info_ptr_i = ext_ptr.i; + Extent_info* ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i); + key.m_file_no = ext->m_key.m_file_no; + key.m_page_no = ext->m_first_page_no; + } + } + key.m_page_idx = 0; + pos.m_get = ScanPos::Get_page_dd; } - - Uint32 len= page_ptr->get_entry_len(key.m_page_idx); - if (len == 0) + /*FALLTHRU*/ + case ScanPos::Get_page_dd: + // get global page in PGMAN cache + jam(); { - // skip empty slot or - jam(); - continue; + // check if page is un-allocated or empty + if (likely(! (bits & ScanOp::SCAN_NR))) + { + Tablespace_client tsman(signal, c_tsman, + frag.fragTableId, + frag.fragmentId, + frag.m_tablespace_id); + unsigned bits = ~(unsigned)0; + int ret = tsman.get_page_free_bits(&key, &bits); + ndbrequire(ret == 0); + if (bits == 0) { + // skip empty page + jam(); + pos.m_get = ScanPos::Get_next_page_dd; + break; // incr loop count + } + } + // page request to PGMAN + Page_cache_client::Request preq; + preq.m_page = pos.m_key; + preq.m_callback.m_callbackData = scanPtr.i; + preq.m_callback.m_callbackFunction = + safe_cast(&Dbtup::disk_page_tup_scan_callback); + int flags = Page_cache_client::STRICT_ORDER; + int res = m_pgman.get_page(signal, preq, flags); + if (res == 0) { + jam(); + // request queued + pos.m_get = ScanPos::Get_tuple; + return false; + } + ndbrequire(res > 0); + pos.m_page = (Page*)m_pgman.m_ptr.p; } - if(len & Var_page::CHAIN) + pos.m_get = ScanPos::Get_tuple; + continue; + // get tuple + // move to next tuple + case ScanPos::Get_next_tuple: + case ScanPos::Get_next_tuple_fs: + // move to next fixed size tuple + jam(); { - // skip varpart chain - jam(); - continue; + key.m_page_idx += size; + pos.m_get = ScanPos::Get_tuple_fs; } - th = (Tuple_header*)page_ptr->get_ptr(key.m_page_idx); - } + /*FALLTHRU*/ + case ScanPos::Get_tuple: + case ScanPos::Get_tuple_fs: + // get fixed size tuple + jam(); + { + Fix_page* page = (Fix_page*)pos.m_page; + if (key.m_page_idx + size <= Fix_page::DATA_WORDS) + { + pos.m_get = ScanPos::Get_next_tuple_fs; + th = (Tuple_header*)&page->m_data[key.m_page_idx]; + if (likely(! (bits & ScanOp::SCAN_NR))) + { + if (! (th->m_header_bits & Tuple_header::FREE)) { + goto found_tuple; + } + else + { + jam(); + // skip free tuple + } + } + else + { + if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI) + { + if (! (th->m_header_bits & Tuple_header::FREE)) + { + jam(); + goto found_tuple; + } + else + { + goto found_deleted_rowid; + } + } + else + { + jam(); + // skip free tuple + } + } + } else { + jam(); + // no more tuples on this page + pos.m_get = ScanPos::Get_next_page; + } + } + break; // incr loop count + found_tuple: + // found possible tuple to return + jam(); + { + // caller has already set pos.m_get to next tuple + if (! (bits & ScanOp::SCAN_LCP && + th->m_header_bits & Tuple_header::LCP_SKIP)) { + Local_key& key_mm = pos.m_key_mm; + if (! (bits & ScanOp::SCAN_DD)) { + key_mm = pos.m_key; + // real page id is already set + } else { + key_mm.assref(th->m_base_record_ref); + // recompute for each disk tuple + pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no); + } + // TUPKEYREQ handles savepoint stuff + scan.m_state = ScanOp::Current; + return true; + } else { + jam(); + // clear it so that it will show up in next LCP + th->m_header_bits &= ~(Uint32)Tuple_header::LCP_SKIP; + } + } + break; + found_deleted_rowid: + jam(); + { + ndbassert(bits & ScanOp::SCAN_NR); + Local_key& key_mm = pos.m_key_mm; + Fix_page* page = (Fix_page*)pos.m_page; + if (! (bits & ScanOp::SCAN_DD)) { + key_mm = pos.m_key; + // caller has already set pos.m_get to next tuple + // real page id is already set + } else { + key_mm.assref(th->m_base_record_ref); + // recompute for each disk tuple + pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no); + + Fix_page *mmpage = (Fix_page*)c_page_pool.getPtr(pos.m_realpid_mm); + th = (Tuple_header*)(mmpage->m_data + key_mm.m_page_idx); + if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI) + { + if (! (th->m_header_bits & Tuple_header::FREE)) + break; + } + } + + NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); + conf->scanPtr = scan.m_userPtr; + conf->accOperationPtr = RNIL; + conf->fragId = frag.fragmentId; + conf->localKey[0] = pos.m_key_mm.ref(); + conf->localKey[1] = 0; + conf->localKeyLength = 1; + conf->gci = foundGCI; + Uint32 blockNo = refToBlock(scan.m_userRef); + EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 7); + jamEntry(); - if(bits & ScanOp::SCAN_LCP && - th->m_header_bits & Tuple_header::LCP_SKIP) - { - /** - * Clear it so that it will show up in next LCP - */ - ((Tuple_header*)th)->m_header_bits &= ~(Uint32)Tuple_header::LCP_SKIP; - continue; + // TUPKEYREQ handles savepoint stuff + loop_count = 32; + scan.m_state = ScanOp::Next; + return false; + } + break; // incr loop count + default: + ndbrequire(false); + break; } - scan.m_state = ScanOp::Locked; - break; + if (++loop_count >= 32) + break; } + // TODO: at drop table we have to flush and terminate these + jam(); + signal->theData[0] = ZTUP_SCAN; + signal->theData[1] = scanPtr.i; + sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); + return false; +} + +void +Dbtup::scanCont(Signal* signal, ScanOpPtr scanPtr) +{ + bool immediate = scanNext(signal, scanPtr); + if (! immediate) { + jam(); + // time-slicing again + return; + } + scanReply(signal, scanPtr); +} + +void +Dbtup::disk_page_tup_scan_callback(Signal* signal, Uint32 scanPtrI, Uint32 page_i) +{ + ScanOpPtr scanPtr; + c_scanOpPool.getPtr(scanPtr, scanPtrI); + ScanOp& scan = *scanPtr.p; + ScanPos& pos = scan.m_scanPos; + // get cache page + Ptr<GlobalPage> gptr; + m_global_page_pool.getPtr(gptr, page_i); + pos.m_page = (Page*)gptr.p; + // continue + scanCont(signal, scanPtr); } void @@ -350,11 +902,44 @@ Dbtup::scanClose(Signal* signal, ScanOpPtr scanPtr) unsigned signalLength = 3; sendSignal(scanPtr.p->m_userRef, GSN_NEXT_SCANCONF, signal, signalLength, JBB); - releaseScanOp(scanPtr); } void +Dbtup::addAccLockOp(ScanOp& scan, Uint32 accLockOp) +{ + LocalDLFifoList<ScanLock> list(c_scanLockPool, scan.m_accLockOps); + ScanLockPtr lockPtr; +#ifdef VM_TRACE + list.first(lockPtr); + while (lockPtr.i != RNIL) { + ndbrequire(lockPtr.p->m_accLockOp != accLockOp); + list.next(lockPtr); + } +#endif + bool ok = list.seize(lockPtr); + ndbrequire(ok); + lockPtr.p->m_accLockOp = accLockOp; +} + +void +Dbtup::removeAccLockOp(ScanOp& scan, Uint32 accLockOp) +{ + LocalDLFifoList<ScanLock> list(c_scanLockPool, scan.m_accLockOps); + ScanLockPtr lockPtr; + list.first(lockPtr); + while (lockPtr.i != RNIL) { + if (lockPtr.p->m_accLockOp == accLockOp) { + jam(); + break; + } + list.next(lockPtr); + } + ndbrequire(lockPtr.i != RNIL); + list.release(lockPtr); +} + +void Dbtup::releaseScanOp(ScanOpPtr& scanPtr) { FragrecordPtr fragPtr; @@ -396,8 +981,9 @@ Dbtup::execLCP_FRAG_ORD(Signal* signal) frag.m_lcp_scan_op = c_lcp_scan_op; ScanOpPtr scanPtr; c_scanOpPool.getPtr(scanPtr, frag.m_lcp_scan_op); - - scanFirst(signal, fragPtr.p, scanPtr); + //ndbrequire(scanPtr.p->m_fragPtrI == fragPtr.i); ? + + scanFirst(signal, scanPtr); scanPtr.p->m_state = ScanOp::First; } } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp index 368a828425e..f35ccfb6019 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp @@ -580,7 +580,12 @@ Dbtup::fireDetachedTriggers(KeyReqStruct *req_struct, { regOperPtr->op_struct.op_type = ZUPDATE; } - + + /** + * Set disk page + */ + req_struct->m_disk_page_ptr.i = m_pgman.m_ptr.i; + ndbrequire(regOperPtr->is_first_operation()); triggerList.first(trigPtr); while (trigPtr.i != RNIL) { @@ -817,8 +822,8 @@ bool Dbtup::readTriggerInfo(TupTriggerData* const trigPtr, //-------------------------------------------------------------------- // Read Primary Key Values //-------------------------------------------------------------------- - if (regTabPtr->need_expand(false)) // no disk - prepare_read(req_struct, regTabPtr, false); // setup varsize + if (regTabPtr->need_expand()) + prepare_read(req_struct, regTabPtr, true); int ret = readAttributes(req_struct, &tableDescriptor[regTabPtr->readKeyArray].tabDescr, @@ -902,8 +907,8 @@ bool Dbtup::readTriggerInfo(TupTriggerData* const trigPtr, req_struct->m_tuple_ptr= (Tuple_header*)ptr; } - if (regTabPtr->need_expand(false)) // no disk - prepare_read(req_struct, regTabPtr, false); // setup varsize + if (regTabPtr->need_expand()) // no disk + prepare_read(req_struct, regTabPtr, true); int ret = readAttributes(req_struct, &readBuffer[0], @@ -1168,7 +1173,7 @@ Dbtup::executeTuxCommitTriggers(Signal* signal, req->pageIndex = regOperPtr->m_tuple_location.m_page_idx; req->tupVersion = tupVersion; req->opInfo = TuxMaintReq::OpRemove; - removeTuxEntries(signal, regOperPtr, regTabPtr); + removeTuxEntries(signal, regTabPtr); } void @@ -1200,12 +1205,11 @@ Dbtup::executeTuxAbortTriggers(Signal* signal, req->pageIndex = regOperPtr->m_tuple_location.m_page_idx; req->tupVersion = tupVersion; req->opInfo = TuxMaintReq::OpRemove; - removeTuxEntries(signal, regOperPtr, regTabPtr); + removeTuxEntries(signal, regTabPtr); } void Dbtup::removeTuxEntries(Signal* signal, - Operationrec* regOperPtr, Tablerec* regTabPtr) { TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend(); diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupVarAlloc.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupVarAlloc.cpp index 8436c72993d..94bd75108a4 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupVarAlloc.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupVarAlloc.cpp @@ -39,394 +39,6 @@ void Dbtup::init_list_sizes(void) c_max_list_size[4]= 199; } -#if 0 -void -Dbtup::free_separate_var_part(Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, - Tuple_header* tuple_header) -{ - Uint32 page_ref, page_index; - PagePtr page_ptr; - page_ref= tuple_header->m_data[regTabPtr->var_offset]; - page_index= page_ref & MAX_TUPLES_PER_PAGE; - page_ptr.i= page_ref >> MAX_TUPLES_BITS; - ptrCheckGuard(page_ptr, cnoOfPage, cpage); - free_var_rec(regFragPtr, - regTabPtr, - (Var_page*)page_ptr.p, - page_index); -} - - -void -Dbtup::abort_separate_var_part(Uint32 var_page_ref, - const Uint32* copy_var_part, - Uint32 copy_var_size) -{ - Uint32 page_index; - PagePtr var_page_ptr; - page_index= var_page_ref & MAX_TUPLES_PER_PAGE; - var_page_ptr.i= var_page_ref >> MAX_TUPLES_BITS; - ptrCheckGuard(var_page_ptr, cnoOfPage, cpage); - Uint32 *ptr= ((Var_page*)var_page_ptr.p)->get_ptr(page_index); - MEMCOPY_NO_WORDS(ptr, copy_var_part, copy_var_size); -} - -void -Dbtup::shrink_entry(Fragrecord* const regFragPtr, - Var_page* const page_ptr, - Uint32 page_index, - Uint32 new_size) -{ - - page_ptr->shrink_entry(page_index, new_size); - update_free_page_list(regFragPtr, page_ptr); -} - -void -Dbtup::check_entry_size(KeyReqStruct* req_struct, - Operationrec* regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr) -{ -#if 0 - Uint32 vp_index, no_var_attr, total_var_size, add_size, new_size, entry_len; - Uint32 vp_offset, tuple_size, var_part_local; - Uint32 *var_data_part, *var_link; - PagePtr var_page_ptr; - Uint32* tuple_ptr= req_struct->m_tuple_ptr; - Uint32 page_index= regOperPtr->m_tuple_location.m_page_idx; - tuple_size= regTabPtr->tupheadsize; - no_var_attr= regTabPtr->no_var_attr; - var_part_local= get_var_part_local(* (tuple_ptr+1)); - add_size= regTabPtr->var_array_wsize; - var_link= tuple_ptr+tuple_size; - if (var_part_local == 1) { - ljam(); - var_data_part= var_link; - var_page_ptr.p= req_struct->fix_page_ptr.p; - add_size+= tuple_size; - vp_index= regOperPtr->m_tuple_location.m_page_idx; - } else { - ljam(); - entry_len= get_entry_len(req_struct->var_page_ptr, page_index); - if (entry_len > (tuple_size + 1)) { - ljam(); - shrink_entry(regFragPtr, - req_struct->fix_page_ptr, - page_index, - tuple_size + 1); - } else { - ndbassert(entry_len == (tuple_size + 1)); - } - set_up_var_page(*var_link, - regFragPtr, - var_page_ptr, - vp_index, - vp_offset); - var_data_part= &var_page_ptr.p->pageWord[vp_offset]; - } - total_var_size= calculate_total_var_size((uint16*)var_data_part, - no_var_attr); - new_size= total_var_size + add_size; - entry_len= get_entry_len(var_page_ptr.p, vp_index); - if (new_size < entry_len) { - ljam(); - shrink_entry(regFragPtr, - var_page_ptr.p, - vp_index, - new_size); - } else { - ndbassert(entry_len == new_size); - } -#endif -} - -inline -void -Dbtup::grow_entry(Fragrecord* const regFragPtr, - Var_page* page_header, - Uint32 page_index, - Uint32 growth_len) -{ - page_header->grow_entry(page_index, growth_len); - update_free_page_list(regFragPtr, page_header); -} - - -void -Dbtup::setup_varsize_part(KeyReqStruct* req_struct, - Operationrec* const regOperPtr, - Tablerec* const regTabPtr) -{ - Uint32 num_var_attr; - Uint32 var_data_wsize; - Uint32* var_data_ptr; - Uint32* var_data_start; - - Uint32 page_index= regOperPtr->m_tuple_location.m_page_idx; - if (regTabPtr->var_sized_record) { - ljam(); - num_var_attr= regTabPtr->no_var_attr; - if (!(req_struct->m_tuple_ptr->m_header_bits & Tuple_header::CHAINED_ROW)) - { - ljam(); - var_data_ptr= req_struct->m_tuple_ptr->m_data+regTabPtr->var_offset; - req_struct->var_page_ptr.i = req_struct->fix_page_ptr.i; - req_struct->var_page_ptr.p = (Var_page*)req_struct->fix_page_ptr.p; - req_struct->vp_index= page_index; - } else { - Uint32 var_link= req_struct->m_tuple_ptr->m_data[regTabPtr->var_offset]; - ljam(); - - Uint32 vp_index= var_link & MAX_TUPLES_PER_PAGE; - PagePtr var_page_ptr; - var_page_ptr.i= var_link >> MAX_TUPLES_BITS; - ptrCheckGuard(var_page_ptr, cnoOfPage, cpage); - - req_struct->vp_index= vp_index; - req_struct->var_page_ptr.i= var_page_ptr.i; - req_struct->var_page_ptr.p= (Var_page*)var_page_ptr.p; - - var_data_ptr= ((Var_page*)var_page_ptr.p)->get_ptr(vp_index); - req_struct->fix_var_together= false; - } - var_data_start= &var_data_ptr[regTabPtr->var_array_wsize]; - req_struct->var_len_array= (Uint16*)var_data_ptr; - req_struct->var_data_start= var_data_start; - var_data_wsize= init_var_pos_array(req_struct->var_len_array, - &req_struct->var_pos_array[0], - num_var_attr); - req_struct->var_data_end= &var_data_start[var_data_wsize]; - } -} - - -bool -Dbtup::compress_var_sized_part_after_update(KeyReqStruct *req_struct, - Operationrec* const regOperPtr, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr) -{ - Uint32 entry_len, old_var_len, new_size, total_size; - Uint32* used_var_data_start= req_struct->var_data_start; - total_size= calculate_total_var_size(req_struct->var_len_array, - regTabPtr->no_var_attr); - entry_len= req_struct->var_page_ptr.p->get_entry_len(req_struct->vp_index); - if (req_struct->fix_var_together) { - ljam(); - old_var_len= entry_len - - (regTabPtr->tupheadsize + regTabPtr->var_array_wsize); - } else { - ljam(); - old_var_len= entry_len - regTabPtr->var_array_wsize; - } - if (total_size > old_var_len) { - ljam(); - /** - * The new total size of the variable part is greater than it was before - * the update. We will need to increase the size of the record or split - * it into a fixed part and a variable part. - */ - if (! handle_growth_after_update(req_struct, - regFragPtr, - regTabPtr, - (total_size - old_var_len))) { - ljam(); - return false; - } - } else if (total_size < old_var_len) { - ljam(); - /** - * The new total size is smaller than what it was before we started. - * In one case we can shrink immediately and this is after an initial - * insert since we allocate in this case a full sized tuple and there - * is no problem in shrinking this already before committing. - * - * For all other cases we need to keep the space to ensure that we - * can safely abort (which means in this case to grow back to - * original size). Thus shrink cannot be done before commit occurs - * in those cases. - */ - if (regOperPtr->op_struct.op_type == ZINSERT && - regOperPtr->prevActiveOp == RNIL && - regOperPtr->nextActiveOp == RNIL) { - ljam(); - new_size= entry_len - (old_var_len - total_size); - shrink_entry(regFragPtr, - req_struct->var_page_ptr.p, - req_struct->vp_index, - new_size); - } - } - reset_req_struct_data(regTabPtr, - req_struct, - regOperPtr->m_tuple_location.m_page_idx); - copy_back_var_attr(req_struct, regTabPtr, used_var_data_start); - return true; -} - -void -Dbtup::reset_req_struct_data(Tablerec* const regTabPtr, - KeyReqStruct* req_struct, - Uint32 fix_index) -{ - Var_page *var_page_ptr, *fix_page_ptr; - Uint32 vp_index; - - fix_page_ptr= (Var_page*)req_struct->fix_page_ptr.p; - var_page_ptr= req_struct->var_page_ptr.p; - vp_index= req_struct->vp_index; - - req_struct->m_tuple_ptr= (Tuple_header*)fix_page_ptr->get_ptr(fix_index); - - Uint32 vp_len= var_page_ptr->get_entry_len(vp_index); - - Uint32 *var_ptr; - if (req_struct->fix_var_together) - { - ljam(); - var_ptr= req_struct->m_tuple_ptr->m_data+regTabPtr->var_offset; - } - else - { - var_ptr= var_page_ptr->get_ptr(vp_index); - } - - req_struct->var_len_array= (Uint16*)(var_ptr); - req_struct->var_data_start= var_ptr+regTabPtr->var_array_wsize; - req_struct->var_data_end= var_ptr+regTabPtr->var_array_wsize+vp_len; -} - -void -Dbtup::copy_back_var_attr(KeyReqStruct *req_struct, - Tablerec* const regTabPtr, - Uint32 *source_rec) -{ - Uint32 i, dest_index, vpos_index, byte_size, word_size, num_var_attr; - Uint32 *dest_rec, max_var_size, entry_len; - Uint32 total_word_size= 0; - -#ifdef VM_TRACE - entry_len= req_struct->var_page_ptr.p->get_entry_len(req_struct->vp_index); - if (req_struct->fix_var_together) { - ljam(); - max_var_size= entry_len - (regTabPtr->tupheadsize + - regTabPtr->var_array_wsize); - } else { - ljam(); - max_var_size= entry_len - regTabPtr->var_array_wsize; - } -#endif - dest_rec= req_struct->var_data_start; - num_var_attr= regTabPtr->no_var_attr; - ljam(); - for (i= 0; i < num_var_attr; i++) { - dest_index= total_word_size; - byte_size= req_struct->var_len_array[i]; - vpos_index= req_struct->var_pos_array[i]; - word_size= convert_byte_to_word_size(byte_size); - total_word_size+= word_size; - req_struct->var_pos_array[i]= total_word_size; - MEMCOPY_NO_WORDS(&dest_rec[vpos_index], - &source_rec[dest_index], - word_size); - ndbassert((vpos_index + word_size) <= max_var_size); - } - ndbassert(total_word_size <= max_var_size); - req_struct->var_pos_array[num_var_attr]= total_word_size; - req_struct->var_data_end= &req_struct->var_data_start[total_word_size]; -} - - -void -Dbtup::copy_out_var_attr(KeyReqStruct *req_struct, - Tablerec* const regTabPtr) -{ - Uint32 i, source_index, byte_size, vpos_index, word_size, last_pos_array; - Uint32 num_var_attr= regTabPtr->no_var_attr; - Uint16 copy_pos_array[MAX_ATTRIBUTES_IN_TABLE + 1]; - init_var_len_array(©_pos_array[0], regTabPtr); - init_var_pos_array(©_pos_array[0], - ©_pos_array[0], - regTabPtr->no_var_attr); - - Uint32 *source_rec= req_struct->var_data_start; - Uint32 *dest_rec= &ctemp_var_record[0]; - Uint32 total_word_size= 0; - ljam(); - for (i= 0; i < num_var_attr; i++) { - source_index= total_word_size; - byte_size= req_struct->var_len_array[i]; - vpos_index= copy_pos_array[i]; - word_size= convert_byte_to_word_size(byte_size); - total_word_size+= word_size; - req_struct->var_pos_array[i]= copy_pos_array[i]; - MEMCOPY_NO_WORDS(&dest_rec[source_index], - &source_rec[vpos_index], - word_size); - } - last_pos_array= copy_pos_array[num_var_attr]; - req_struct->var_data_start= dest_rec; - req_struct->var_data_end= &dest_rec[last_pos_array]; - req_struct->var_part_updated= true; - req_struct->var_pos_array[num_var_attr]= last_pos_array; -} - - -Uint32 -Dbtup::calculate_total_var_size(Uint16* var_len_array, - Uint32 num_var_attr) -{ - Uint32 i, byte_size, word_size, total_size; - total_size= 0; - for (i= 0; i < num_var_attr; i++) { - byte_size= var_len_array[i]; - word_size= convert_byte_to_word_size(byte_size); - total_size+= word_size; - } - return total_size; -} - -Uint32 -Dbtup::init_var_pos_array(Uint16* var_len_array, - Uint16* var_pos_array, - Uint32 num_var_attr) -{ - Uint32 i, real_len, word_len; - Uint32 curr_pos= 0; - for (i= 0, curr_pos= 0; i < num_var_attr; i++) { - real_len= var_len_array[i]; - var_pos_array[i]= curr_pos; - word_len= convert_byte_to_word_size(real_len); - curr_pos+= word_len; - } - var_pos_array[num_var_attr]= curr_pos; - return curr_pos; -} - -void -Dbtup::init_var_len_array(Uint16 *var_len_array, Tablerec *tab_ptr) -{ - Uint32 array_ind= 0; - Uint32 attr_descr, i; - Uint32 no_of_attr= tab_ptr->noOfAttr; - Uint32 descr_start= tab_ptr->tabDescriptor; - TableDescriptor *tab_descr= &tableDescriptor[descr_start]; - ndbrequire(descr_start + (no_of_attr << ZAD_LOG_SIZE) <= cnoOfTabDescrRec); - for (i= 0; i < no_of_attr; i++) { - attr_descr= tab_descr[i * ZAD_SIZE].tabDescr; - if (AttributeDescriptor::getArrayType(attr_descr) == 0) { - Uint32 bits_used= AttributeDescriptor::getArraySize(attr_descr) * - (1 << AttributeDescriptor::getSize(attr_descr)); - Uint32 no_attr_bytes= ((bits_used + 7) >> 3); - var_len_array[array_ind++]= no_attr_bytes; - } - } -} - -#endif - /* Allocator for variable sized segments Part of the external interface for variable sized segments @@ -438,8 +50,8 @@ Dbtup::init_var_len_array(Uint16 *var_len_array, Tablerec *tab_ptr) and dropping attributes without the need to copy the entire table. SYNOPSIS - frag_ptr A pointer to the fragment description - tab_ptr A pointer to the table description + fragPtr A pointer to the fragment description + tabPtr A pointer to the table description alloc_size Size of the allocated record signal The signal object to be used if a signal needs to be sent @@ -451,44 +63,81 @@ Dbtup::init_var_len_array(Uint16 *var_len_array, Tablerec *tab_ptr) page_ptr The i and p value of the page where the record was allocated */ -Uint32* Dbtup::alloc_var_rec(Fragrecord* const frag_ptr, - Tablerec* const tab_ptr, +Uint32* Dbtup::alloc_var_rec(Fragrecord* fragPtr, + Tablerec* tabPtr, Uint32 alloc_size, Local_key* key, - Uint32 * out_frag_page_id, - Uint32 base) + Uint32 * out_frag_page_id) { - Var_page* page_header; - PagePtr page_ptr; - page_ptr.i= get_alloc_page(frag_ptr, (alloc_size + 1)); - if (page_ptr.i == RNIL) { + /** + * TODO alloc fix+var part + */ + tabPtr->m_offsets[MM].m_fix_header_size += Tuple_header::HeaderSize + 1; + Uint32 *ptr = alloc_fix_rec(fragPtr, tabPtr, key, out_frag_page_id); + tabPtr->m_offsets[MM].m_fix_header_size -= Tuple_header::HeaderSize + 1; + if (unlikely(ptr == 0)) + { + return 0; + } + + ndbassert(alloc_size >= tabPtr->m_offsets[MM].m_fix_header_size + + Tuple_header::HeaderSize); + + alloc_size -= tabPtr->m_offsets[MM].m_fix_header_size + + Tuple_header::HeaderSize; + + + Local_key varref; + if (likely(alloc_var_part(fragPtr, tabPtr, alloc_size, &varref) != 0)) + { + Tuple_header* tuple = (Tuple_header*)ptr; + * tuple->get_var_part_ptr(tabPtr) = varref.ref(); + return ptr; + } + + PagePtr pagePtr; + c_page_pool.getPtr(pagePtr, key->m_page_no); + free_fix_rec(fragPtr, tabPtr, key, (Fix_page*)pagePtr.p); + return 0; +} + +Uint32* +Dbtup::alloc_var_part(Fragrecord* fragPtr, + Tablerec* tabPtr, + Uint32 alloc_size, + Local_key* key) +{ + PagePtr pagePtr; + pagePtr.i= get_alloc_page(fragPtr, (alloc_size + 1)); + if (pagePtr.i == RNIL) { ljam(); - if ((page_ptr.i= getEmptyPage(frag_ptr)) == RNIL) { + if ((pagePtr.i= get_empty_var_page(fragPtr)) == RNIL) { ljam(); return 0; } - ptrCheckGuard(page_ptr, cnoOfPage, cpage); - page_header= (Var_page*)page_ptr.p; - page_header->init(); - insert_free_page(frag_ptr, page_header, MAX_FREE_LIST - 1); + c_page_pool.getPtr(pagePtr); + ((Var_page*)pagePtr.p)->init(); + pagePtr.p->list_index = MAX_FREE_LIST - 1; + LocalDLList<Page> list(c_page_pool, + fragPtr->free_var_page_array[MAX_FREE_LIST-1]); + list.add(pagePtr); /* * Tup scan and index build check ZEMPTY_MM to skip un-init()ed * page. Change state here. For varsize it means "page in use". */ - page_ptr.p->page_state = ZTH_MM_FREE; + pagePtr.p->page_state = ZTH_MM_FREE; } else { - ptrCheckGuard(page_ptr, cnoOfPage, cpage); + c_page_pool.getPtr(pagePtr); ljam(); - page_header= (Var_page*)page_ptr.p; } - Uint32 idx= page_header->alloc_record(alloc_size, - (Var_page*)ctemp_page, base); + Uint32 idx= ((Var_page*)pagePtr.p) + ->alloc_record(alloc_size, (Var_page*)ctemp_page, Var_page::CHAIN); + + key->m_page_no = pagePtr.i; + key->m_page_idx = idx; - key->m_page_no= page_ptr.i; - key->m_page_idx= idx; - *out_frag_page_id= page_header->frag_page_id; - update_free_page_list(frag_ptr, page_header); - return page_header->get_ptr(idx); + update_free_page_list(fragPtr, pagePtr); + return ((Var_page*)pagePtr.p)->get_ptr(idx); } /* @@ -496,8 +145,8 @@ Uint32* Dbtup::alloc_var_rec(Fragrecord* const frag_ptr, Part of the external interface for variable sized segments SYNOPSIS - frag_ptr A pointer to the fragment description - tab_ptr A pointer to the table description + fragPtr A pointer to the fragment description + tabPtr A pointer to the table description signal The signal object to be used if a signal needs to be sent page_ptr A reference to the page of the variable sized @@ -507,102 +156,59 @@ Uint32* Dbtup::alloc_var_rec(Fragrecord* const frag_ptr, RETURN VALUES Returns true if deallocation was successful otherwise false */ -void -Dbtup::free_var_part(Fragrecord* frag_ptr, Tablerec* tab_ptr, - Var_part_ref ref, Uint32 chain) +void Dbtup::free_var_rec(Fragrecord* fragPtr, + Tablerec* tabPtr, + Local_key* key, + Ptr<Page> pagePtr) { - Local_key tmp; - PagePtr pagePtr; - tmp.m_page_idx= ref.m_ref & MAX_TUPLES_PER_PAGE; - pagePtr.i= tmp.m_page_no= ref.m_ref >> MAX_TUPLES_BITS; - - ptrCheckGuard(pagePtr, cnoOfPage, cpage); - free_var_part(frag_ptr, tab_ptr, &tmp, (Var_page*)pagePtr.p, chain); -} - -void Dbtup::free_var_part(Fragrecord* const frag_ptr, - Tablerec* const tab_ptr, - Local_key* key, - Var_page* const page_header, - Uint32 chain) -{ - + /** + * TODO free fix + var part + */ Uint32 page_idx= key->m_page_idx; - page_header->free_record(page_idx, chain); + Uint32 *ptr = ((Fix_page*)pagePtr.p)->get_ptr(key->m_page_idx, 0); + Tuple_header* tuple = (Tuple_header*)ptr; + + Local_key ref; + ref.assref(* tuple->get_var_part_ptr(tabPtr)); + + free_fix_rec(fragPtr, tabPtr, key, (Fix_page*)pagePtr.p); + + c_page_pool.getPtr(pagePtr, ref.m_page_no); + ((Var_page*)pagePtr.p)->free_record(ref.m_page_idx, Var_page::CHAIN); - ndbassert(page_header->free_space <= Var_page::DATA_WORDS); - if (page_header->free_space == Var_page::DATA_WORDS - 1) + ndbassert(pagePtr.p->free_space <= Var_page::DATA_WORDS); + if (pagePtr.p->free_space == Var_page::DATA_WORDS - 1) { ljam(); /* - This code could be used when we release pages. - remove_free_page(signal,frag_ptr,page_header,page_header->list_index); - return_empty_page(frag_ptr, page_header); + This code could be used when we release pages. + remove_free_page(signal,fragPtr,page_header,page_header->list_index); + return_empty_page(fragPtr, page_header); */ - update_free_page_list(frag_ptr, page_header); + update_free_page_list(fragPtr, pagePtr); } else { ljam(); - update_free_page_list(frag_ptr, page_header); + update_free_page_list(fragPtr, pagePtr); } return; } - -#if 0 -/* - This method is called whenever the variable part has been updated and - has grown beyond its original size. This means that more space needs to - be allocated to the record. If possible this space should be in the - same page but we might have to allocate more space in a new page. - In the case of a new page we must still keep the old page and the - page index since this is the entrance to the record. In this case the - record might have to be split into a fixed part and a variable part. - - This routine uses cinBuffer as temporary copy buffer. This is no longer - used since it contains the interpreted program to use in the update - and this has completed when this function is called. - - SYNOPSIS - req_struct The structure for temporary content - signal The signal object - regOperPtr The operation record - regFragPtr The fragment record - regTabPtr The table record - - RETURN VALUES - bool false if failed due to lack of memory - */ -bool -Dbtup::handle_growth_after_update(KeyReqStruct* req_struct, - Fragrecord* const regFragPtr, - Tablerec* const regTabPtr, - Uint32 growth_len) +int +Dbtup::realloc_var_part(Fragrecord* fragPtr, Tablerec* tabPtr, PagePtr pagePtr, + Var_part_ref* ref, Uint32 oldsz, Uint32 newsz) { - Uint32 vp_index, alloc_size, entry_len, curr_var_len; - Uint32 new_vp_index, new_vp_offset, new_page_ref; - Uint32 *copy_record= &cinBuffer[0]; - Ptr<Var_page> var_page= req_struct->var_page_ptr; - Var_page* page_header= var_page.p; - vp_index= req_struct->vp_index; - entry_len= var_page.p->get_entry_len(vp_index); - if (page_header->free_space >= growth_len) { - /** - * We will be able to handle the growth without changing the page - * and page index. - */ - if (page_header->largest_frag_size() >= entry_len + growth_len) { - ljam(); - /** - * In this case we need to copy the entry to the free space area of - * the page, it is not necessary to reorganise the page. - */ - MEMCOPY_NO_WORDS(page_header->get_free_space_ptr(), - page_header->get_ptr(vp_index), - entry_len); - page_header->set_entry_offset(vp_index, page_header->insert_pos); - page_header->insert_pos+= entry_len; - } else { - ljam(); + Uint32 add = newsz - oldsz; + Var_page* pageP = (Var_page*)pagePtr.p; + Local_key oldref; + oldref.assref(*(Uint32*)ref); + + if (pageP->free_space >= add) + { + jam(); + if(!pageP->is_space_behind_entry(oldref.m_page_idx, add)) + { + if(0) printf("extra reorg"); + jam(); /** * In this case we need to reorganise the page to fit. To ensure we * don't complicate matters we make a little trick here where we @@ -610,98 +216,49 @@ Dbtup::handle_growth_after_update(KeyReqStruct* req_struct, * that separately at the end. This means we need to copy it out of * the page before reorg_page to save the entry contents. */ - MEMCOPY_NO_WORDS(copy_record, - page_header->get_ptr(vp_index), - entry_len); - page_header->set_entry_len(vp_index, 0); - page_header->free_space+= entry_len; - reorg_page(page_header); - MEMCOPY_NO_WORDS(page_header->get_free_space_ptr(), - copy_record, - entry_len); - page_header->set_entry_offset(vp_index, page_header->insert_pos); - growth_len+= entry_len; - } - grow_entry(regFragPtr, - page_header, - vp_index, - growth_len); - return true; - } else { - /** - * It is necessary to allocate a segment from a new page. - */ - if (req_struct->fix_var_together) { - ljam(); - alloc_size= (entry_len + growth_len) - regTabPtr->tupheadsize; - curr_var_len= alloc_size - regTabPtr->var_array_wsize; - } else { - ljam(); - curr_var_len= entry_len - regTabPtr->var_array_wsize; - alloc_size= entry_len + growth_len; - } - Uint32* ptr, frag_page_id; - Local_key key; - if ((ptr= alloc_var_rec(regFragPtr, - regTabPtr, - alloc_size, - &key, &frag_page_id)) == 0) - { - /** - * No space existed for this growth. We need to abort the update. - */ - ljam(); - terrorCode= ZMEM_NOMEM_ERROR; - return false; + Uint32* copyBuffer= cinBuffer; + memcpy(copyBuffer, pageP->get_ptr(oldref.m_page_idx), 4*oldsz); + pageP->set_entry_len(oldref.m_page_idx, 0); + pageP->free_space += oldsz; + pageP->reorg((Var_page*)ctemp_page); + memcpy(pageP->get_free_space_ptr(), copyBuffer, 4*oldsz); + pageP->set_entry_offset(oldref.m_page_idx, pageP->insert_pos); + add += oldsz; } - - /* - * I need to be careful to copy the var_len_array before freeing it. - * The data part will be copied by copy_back_var_attr immediately - * after returning from this method. - * The updated var part is always in ctemp_var_record since I can - * never arrive here after a first insert. Thus no danger of the - * var part written being released. - */ - MEMCOPY_NO_WORDS(ptr, - req_struct->var_len_array, - regTabPtr->var_array_wsize); - req_struct->var_len_array= (Uint16*)ptr; - if (! req_struct->fix_var_together) { - ljam(); - /* - * We need to deallocate the old variable part. This new one will - * remain the variable part even if we abort the transaction. - * We don't keep multiple references to the variable parts. - * The copy data for abort is still kept in the copy record. - */ - free_separate_var_part(regFragPtr, regTabPtr, req_struct->m_tuple_ptr); - } else { - ljam(); - req_struct->fix_var_together= false; - } - page_header= (Var_page*)var_page.p; - new_page_ref= (key.m_page_no << MAX_TUPLES_BITS) + key.m_page_idx; - req_struct->m_tuple_ptr->m_data[regTabPtr->var_offset] = new_page_ref; - Uint32 bits= req_struct->m_tuple_ptr->m_header_bits; - req_struct->m_tuple_ptr->m_header_bits |= Tuple_header::CHAINED_ROW; - req_struct->var_page_ptr= var_page; - req_struct->vp_index= key.m_page_idx; + pageP->grow_entry(oldref.m_page_idx, add); + update_free_page_list(fragPtr, pagePtr); } - return true; + else + { + Local_key newref; + Uint32 *src = pageP->get_ptr(oldref.m_page_idx); + Uint32 *dst = alloc_var_part(fragPtr, tabPtr, newsz, &newref); + if (unlikely(dst == 0)) + return -1; + + ndbassert(oldref.m_page_no != newref.m_page_no); + ndbassert(pageP->get_entry_len(oldref.m_page_idx) == oldsz); + memcpy(dst, src, 4*oldsz); + * ((Uint32*)ref) = newref.ref(); + + pageP->free_record(oldref.m_page_idx, Var_page::CHAIN); + update_free_page_list(fragPtr, pagePtr); + } + + return 0; } -#endif /* ------------------------------------------------------------------------ */ // Get a page from one of free lists. If the desired free list is empty we // try with the next until we have tried all possible lists. /* ------------------------------------------------------------------------ */ -Uint32 Dbtup::get_alloc_page(Fragrecord* const frag_ptr, Uint32 alloc_size) +Uint32 +Dbtup::get_alloc_page(Fragrecord* fragPtr, Uint32 alloc_size) { - Uint32 i, start_index, loop_count= 0; - PagePtr page_ptr; - + Uint32 i, start_index, loop= 0; + PagePtr pagePtr; + start_index= calculate_free_list_impl(alloc_size); if (start_index == (MAX_FREE_LIST - 1)) { ljam(); @@ -712,37 +269,73 @@ Uint32 Dbtup::get_alloc_page(Fragrecord* const frag_ptr, Uint32 alloc_size) } for (i= start_index; i < MAX_FREE_LIST; i++) { ljam(); - if (frag_ptr->free_var_page_array[i] != RNIL) { + if (!fragPtr->free_var_page_array[i].isEmpty()) { ljam(); - return frag_ptr->free_var_page_array[i]; + return fragPtr->free_var_page_array[i].firstItem; } } ndbrequire(start_index > 0); i= start_index - 1; - page_ptr.i= frag_ptr->free_var_page_array[i]; - while ((page_ptr.i != RNIL) && (loop_count++ < 16)) { + LocalDLList<Page> list(c_page_pool, fragPtr->free_var_page_array[i]); + for(list.first(pagePtr); !pagePtr.isNull() && loop < 16; ) + { ljam(); - ptrCheckGuard(page_ptr, cnoOfPage, cpage); - Var_page* page_header= (Var_page*)page_ptr.p; - if (page_header->free_space >= alloc_size) { + if (pagePtr.p->free_space >= alloc_size) { ljam(); - return page_ptr.i; + return pagePtr.i; } - page_ptr.i= page_header->next_page; + loop++; + list.next(pagePtr); } return RNIL; } +Uint32 +Dbtup::get_empty_var_page(Fragrecord* fragPtr) +{ + PagePtr ptr; + LocalSLList<Page> list(c_page_pool, fragPtr->m_empty_pages); + if (list.remove_front(ptr)) + { + return ptr.i; + } + + Uint32 cnt; + allocConsPages(10, cnt, ptr.i); + if (unlikely(cnt == 0)) + { + return RNIL; + } + + PagePtr ret = ptr; + for (Uint32 i = 0; i<cnt; i++, ptr.i++) + { + c_page_pool.getPtr(ptr); + ptr.p->physical_page_id = ptr.i; + ptr.p->page_state = ZEMPTY_MM; + ptr.p->nextList = ptr.i + 1; + ptr.p->prevList = RNIL; + ptr.p->frag_page_id = RNIL; + } + + if (cnt > 1) + { + ptr.p->nextList = RNIL; + list.add(ret.i + 1, ptr); + } + + return ret.i; +} /* ------------------------------------------------------------------------ */ // Check if the page needs to go to a new free page list. /* ------------------------------------------------------------------------ */ -void Dbtup::update_free_page_list(Fragrecord* const frag_ptr, - Var_page* page_header) +void Dbtup::update_free_page_list(Fragrecord* fragPtr, + Ptr<Page> pagePtr) { Uint32 free_space, list_index; - free_space= page_header->free_space; - list_index= page_header->list_index; + free_space= pagePtr.p->free_space; + list_index= pagePtr.p->list_index; if ((free_space < c_min_list_size[list_index]) || (free_space > c_max_list_size[list_index])) { Uint32 new_list_index= calculate_free_list_impl(free_space); @@ -751,25 +344,30 @@ void Dbtup::update_free_page_list(Fragrecord* const frag_ptr, /* * Only remove it from its list if it is in a list */ - remove_free_page(frag_ptr, page_header, list_index); + LocalDLList<Page> + list(c_page_pool, fragPtr->free_var_page_array[list_index]); + list.remove(pagePtr); } if (free_space < c_min_list_size[new_list_index]) { /* - We have not sufficient amount of free space to put it into any - free list. Thus the page will not be available for new inserts. - This can only happen for the free list with least guaranteed free space. + We have not sufficient amount of free space to put it into any + free list. Thus the page will not be available for new inserts. + This can only happen for the free list with least guaranteed + free space. */ ljam(); ndbrequire(new_list_index == 0); - page_header->list_index= MAX_FREE_LIST; + pagePtr.p->list_index= MAX_FREE_LIST; } else { ljam(); - insert_free_page(frag_ptr, page_header, new_list_index); + LocalDLList<Page> list(c_page_pool, + fragPtr->free_var_page_array[new_list_index]); + list.add(pagePtr); + pagePtr.p->list_index = new_list_index; } } } - /* ------------------------------------------------------------------------ */ // Given size of free space, calculate the free list to put it into /* ------------------------------------------------------------------------ */ @@ -787,60 +385,37 @@ Uint32 Dbtup::calculate_free_list_impl(Uint32 free_space_size) const return 0; } - -/* ------------------------------------------------------------------------ */ -// Remove a page from its current free list -/* ------------------------------------------------------------------------ */ -void Dbtup::remove_free_page(Fragrecord* frag_ptr, - Var_page* page_header, - Uint32 index) +Uint32* +Dbtup::alloc_var_rowid(Fragrecord* fragPtr, + Tablerec* tabPtr, + Uint32 alloc_size, + Local_key* key, + Uint32 * out_frag_page_id) { - Var_page* tmp_page_header; - if (page_header->prev_page == RNIL) { - ljam(); - ndbassert(index < MAX_FREE_LIST); - frag_ptr->free_var_page_array[index]= page_header->next_page; - } else { - ljam(); - PagePtr prev_page_ptr; - prev_page_ptr.i= page_header->prev_page; - ptrCheckGuard(prev_page_ptr, cnoOfPage, cpage); - tmp_page_header= (Var_page*)prev_page_ptr.p; - tmp_page_header->next_page= page_header->next_page; - } - if (page_header->next_page != RNIL) { - ljam(); - PagePtr next_page_ptr; - next_page_ptr.i= page_header->next_page; - ptrCheckGuard(next_page_ptr, cnoOfPage, cpage); - tmp_page_header= (Var_page*) next_page_ptr.p; - tmp_page_header->prev_page= page_header->prev_page; + tabPtr->m_offsets[MM].m_fix_header_size += Tuple_header::HeaderSize + 1; + Uint32 *ptr = alloc_fix_rowid(fragPtr, tabPtr, key, out_frag_page_id); + tabPtr->m_offsets[MM].m_fix_header_size -= Tuple_header::HeaderSize + 1; + if (unlikely(ptr == 0)) + { + return 0; } -} + ndbassert(alloc_size >= tabPtr->m_offsets[MM].m_fix_header_size + + Tuple_header::HeaderSize); + + alloc_size -= tabPtr->m_offsets[MM].m_fix_header_size + + Tuple_header::HeaderSize; -/* ------------------------------------------------------------------------ */ -// Insert a page into a free list on the fragment -/* ------------------------------------------------------------------------ */ -void Dbtup::insert_free_page(Fragrecord* frag_ptr, - Var_page* page_header, - Uint32 index) -{ - Var_page* tmp_page_header; - Uint32 current_head= frag_ptr->free_var_page_array[index]; - Uint32 pagePtrI = page_header->physical_page_id; - page_header->next_page= current_head; - ndbassert(index < MAX_FREE_LIST); - frag_ptr->free_var_page_array[index]= pagePtrI; - page_header->prev_page= RNIL; - page_header->list_index= index; - if (current_head != RNIL) { - ljam(); - PagePtr head_page_ptr; - head_page_ptr.i= current_head; - ptrCheckGuard(head_page_ptr, cnoOfPage, cpage); - tmp_page_header= (Var_page*)head_page_ptr.p; - tmp_page_header->prev_page= pagePtrI; + Local_key varref; + if (likely(alloc_var_part(fragPtr, tabPtr, alloc_size, &varref) != 0)) + { + Tuple_header* tuple = (Tuple_header*)ptr; + * tuple->get_var_part_ptr(tabPtr) = varref.ref(); + return ptr; } + + PagePtr pagePtr; + c_page_pool.getPtr(pagePtr, key->m_page_no); + free_fix_rec(fragPtr, tabPtr, key, (Fix_page*)pagePtr.p); + return 0; } - diff --git a/storage/ndb/src/kernel/blocks/dbtup/Undo_buffer.cpp b/storage/ndb/src/kernel/blocks/dbtup/Undo_buffer.cpp index 350d6ce8759..0adbec7d57b 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/Undo_buffer.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/Undo_buffer.cpp @@ -50,39 +50,34 @@ Undo_buffer::alloc_copy_tuple(Local_key* dst, Uint32 words) m_tup->allocConsPages(1, count, m_first_free); if(count == 0) return 0; - page= (UndoPage*)(m_tup->cpage+m_first_free); + page= (UndoPage*)m_tup->c_page_pool.getPtr(m_first_free); page->m_state= ~ZFREE_COMMON; page->m_words_used= 0; page->m_ref_count= 0; } - if(m_first_free < m_tup->cnoOfPage) + page= (UndoPage*)m_tup->c_page_pool.getPtr(m_first_free); + + Uint32 pos= page->m_words_used; + if(words + pos > UndoPage::DATA_WORDS) { - page= (UndoPage*)(m_tup->cpage+m_first_free); - - Uint32 pos= page->m_words_used; - if(words + pos > UndoPage::DATA_WORDS) - { - m_first_free= RNIL; - return alloc_copy_tuple(dst, words); - } - - dst->m_page_no = m_first_free; - dst->m_page_idx = pos; - - page->m_ref_count++; - page->m_words_used = pos + words; - return page->m_data + pos; + m_first_free= RNIL; + return alloc_copy_tuple(dst, words); } - assert(false); - return 0; + + dst->m_page_no = m_first_free; + dst->m_page_idx = pos; + + page->m_ref_count++; + page->m_words_used = pos + words; + return page->m_data + pos; } void Undo_buffer::shrink_copy_tuple(Local_key* key, Uint32 words) { assert(key->m_page_no == m_first_free); - UndoPage* page= (UndoPage*)(m_tup->cpage+key->m_page_no); + UndoPage* page= (UndoPage*)m_tup->c_page_pool.getPtr(key->m_page_no); assert(page->m_words_used >= words); page->m_words_used -= words; } @@ -90,7 +85,7 @@ Undo_buffer::shrink_copy_tuple(Local_key* key, Uint32 words) void Undo_buffer::free_copy_tuple(Local_key* key) { - UndoPage* page= (UndoPage*)(m_tup->cpage+key->m_page_no); + UndoPage* page= (UndoPage*)m_tup->c_page_pool.getPtr(key->m_page_no); Uint32 cnt= page->m_ref_count; assert(cnt); @@ -115,6 +110,6 @@ Undo_buffer::free_copy_tuple(Local_key* key) Uint32 * Undo_buffer::get_ptr(Local_key* key) { - return ((UndoPage*)(m_tup->cpage+key->m_page_no))->m_data+key->m_page_idx; + return ((UndoPage*)(m_tup->c_page_pool.getPtr(key->m_page_no)))->m_data+key->m_page_idx; } diff --git a/storage/ndb/src/kernel/blocks/dbtup/test_varpage.cpp b/storage/ndb/src/kernel/blocks/dbtup/test_varpage.cpp index 31811ecfafb..dcbbc3c87d3 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/test_varpage.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/test_varpage.cpp @@ -9,14 +9,21 @@ struct Record Uint32* data; }; +NdbOut& +operator <<(NdbOut& out, const Record& rec) +{ + out << "[ idx: " << rec.idx << " sz: " << rec.size << " ]"; + return out; +} + #define TRACE(x) x static -void +bool cmp(const Uint32 *p1, const Uint32 *p2, Uint32 words) { if(memcmp(p1, p2, 4*words) == 0) - return; + return true; for(Uint32 i = 0; i<words; i++) printf(" %.8x", p1[i]); @@ -26,13 +33,20 @@ cmp(const Uint32 *p1, const Uint32 *p2, Uint32 words) printf(" %.8x", p2[i]); printf("\n"); - abort(); + return false; } static void -do_test(int loops, int dist[3]) +do_test(int loops, int dist[5]) { + fprintf(stderr, "do_test(%d, [ %d %d %d %d %d ])\n", + loops, + dist[0], + dist[1], + dist[2], + dist[3], + dist[4]); int allocated= 0; Record records[8192]; @@ -41,24 +55,39 @@ do_test(int loops, int dist[3]) for(int i = 0; i<loops; i++) { + assert(page.high_index + page.insert_pos <= page.DATA_WORDS); + for(int j = 0; j<allocated; j++) { Record rec= records[j]; Uint32* ptr= page.get_ptr(rec.idx); - cmp(ptr, rec.data, rec.size); + Uint32 pos = page.get_ptr(rec.idx) - page.m_data; + if (page.get_entry_len(rec.idx) != rec.size) + { + ndbout << "INVALID LEN " << j << " " << rec << " pos: " << pos << endl; + ndbout << page << endl; + abort(); + } + + if(!cmp(ptr, rec.data, rec.size)) + { + ndbout << "FAILED " << j << " " << rec << " pos: " << pos << endl; + ndbout << page << endl; + abort(); + } } loop: int op; int rnd= rand() % 100; - for(op= 0; op<3; op++) + for(op= 0; op<5; op++) if(rnd < dist[op]) break; if(allocated == 0) op= 0; if(page.free_space <= 2 && op == 0) goto loop; - + switch(op){ case 0: // Alloc { @@ -69,9 +98,73 @@ loop: { rec.data[i] = rand(); } - ndbout << "Alloc " << rec.size << flush; - rec.idx= page.alloc_record(rec.size, &tmp, 0); - ndbout << " -> " << rec.idx << endl; + ndbout << "Alloc hi: " << page.high_index << " (" << + ((rnd < 30) ? "any" : + (rnd < 60) ? "dir" : + (rnd < 80) ? "exp" : "fail") << ") "; + ndbout << rec.size << flush; + if (rnd < 30) + { + rec.idx= page.alloc_record(rec.size, &tmp, 0); + } + else if (rnd < 60) + { + // Alloc with id, from directory + Vector<Uint32> free; + for(Uint32 i = page.high_index - 1; i > 0; i--) + { + if (page.get_index_word(i) & page.FREE) + { + free.push_back(i); + if (free.size() > 100) + break; + } + } + if (free.size()) + { + rec.idx = free[rand() % free.size()]; + if (page.alloc_record(rec.idx, rec.size, &tmp) != rec.idx) + { + abort(); + } + } + else + { + rec.idx = page.high_index; + if (page.alloc_record(rec.idx, rec.size, &tmp) != rec.idx) + { + if (rec.size + 1 != page.free_space) + abort(); + delete [] rec.data; + ndbout_c(" FAIL"); + break; + } + } + } + else if(rnd < 80) + { + // Alloc with id, outside of directory + rec.idx = page.high_index + (rand() % (page.free_space - rec.size)); + if (page.alloc_record(rec.idx, rec.size, &tmp) != rec.idx) + { + abort(); + } + } + else + { + rec.idx = page.high_index + (page.free_space - rec.size) + 1; + if (page.alloc_record(rec.idx, rec.size, &tmp) == rec.idx) + { + abort(); + } + delete [] rec.data; + ndbout_c(" FAIL"); + break; + } + + Uint32 pos = page.get_ptr(rec.idx) - page.m_data; + ndbout << " -> " << rec.idx + << " pos: " << pos << endl; Uint32* ptr= page.get_ptr(rec.idx); memcpy(ptr, rec.data, 4*rec.size); records[allocated++] = rec; @@ -81,12 +174,14 @@ loop: { int no= rand() % allocated; Record rec= records[no]; - ndbout << "Free no: " << no << " idx: " << rec.idx << endl; + Uint32 pos = page.get_ptr(rec.idx) - page.m_data; + ndbout << "Free hi: " << page.high_index << " no: " << no << " idx: " << rec.idx << " pos: " << pos << endl; Uint32* ptr= page.get_ptr(rec.idx); + assert(page.get_entry_len(rec.idx) == rec.size); cmp(ptr, rec.data, rec.size); delete[] rec.data; page.free_record(rec.idx, 0); - + for (unsigned k = no; k + 1 < allocated; k++) records[k] = records[k+1]; allocated--; @@ -98,8 +193,57 @@ loop: page.reorg(&tmp); break; case 3: - ndbout << "Expand" << endl; - + { + Uint32 free = page.free_space; + if (free <= 2) + { + goto shrink; + } + free /= 2; + int no = rand() % allocated; + Record rec= records[no]; + ndbout << "Expand no: " << no << " idx: " << rec.idx + << " add: " << free << " reorg: " + << !page.is_space_behind_entry(rec.idx, free) + << endl; + if (!page.is_space_behind_entry(rec.idx, free)) + { + Uint32 buffer[8192]; + Uint32 len = page.get_entry_len(rec.idx); + memcpy(buffer, page.get_ptr(rec.idx), 4*len); + page.set_entry_len(rec.idx, 0); + page.free_space += len; + page.reorg(&tmp); + memcpy(page.get_free_space_ptr(), buffer, 4*len); + page.set_entry_offset(rec.idx, page.insert_pos); + free += len; + records[no].size = 0; + } + + page.grow_entry(rec.idx, free); + records[no].size += free; + Uint32 *ptr = page.get_ptr(rec.idx); + Uint32 *new_data = new Uint32[records[no].size]; + for(Uint32 i= 0; i<records[no].size; i++) + { + ptr[i] = new_data[i] = rand(); + } + delete []rec.data; + records[no].data = new_data; + break; + } + case 4: + { + shrink: + int no = rand() % allocated; + Record rec = records[no]; + Uint32 sz = rec.size / 2 + 1; + ndbout << "Shrink no: " << no << " idx: " << rec.idx << " remove: " + << (rec.size - sz) << endl; + page.shrink_entry(rec.idx, sz); + records[no].size = sz; + break; + } } } @@ -107,19 +251,27 @@ loop: } int -main(void) +main(int argc, char **argv) { ndb_init(); + + if (argc > 1) + { + time_t seed = time(0); + srand(seed); + fprintf(stderr, "srand(%d)\n", seed); + } + // alloc, free, reorg, grow, shrink - int t1[] = { 30, 90, 100 }; - int t2[] = { 45, 90, 100 }; - int t3[] = { 60, 90, 100 }; - int t4[] = { 75, 90, 100 }; + int t1[] = { 10, 60, 70, 85, 100 }; + int t2[] = { 30, 60, 70, 85, 100 }; + int t3[] = { 50, 60, 70, 85, 100 }; do_test(10000, t1); do_test(10000, t2); do_test(10000, t3); - do_test(10000, t4); + + return 0; } template class Vector<Record>; diff --git a/storage/ndb/src/kernel/blocks/dbtup/tuppage.cpp b/storage/ndb/src/kernel/blocks/dbtup/tuppage.cpp index aaa0f1314c7..5adc034a68d 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/tuppage.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/tuppage.cpp @@ -18,6 +18,20 @@ #include "tuppage.hpp" #include "Dbtup.hpp" +/** + * Fix pages maintain a double linked list of free entries + * + * Var pages has a directory where each entry is + * [ C(1), F(1), L(15), P(15) ] + * C is chain bit, (is it a full tuple or just chain) + * F is free bit + * If true, L is prev free entry (in directory) + * P is next free entry (in directory) + * else + * L is len of entry + * P is pos of entry + */ + Uint32 Tup_fixsize_page::alloc_record() { @@ -29,7 +43,7 @@ Tup_fixsize_page::alloc_record() Uint32 next = m_data[page_idx] & 0xFFFF; assert(prev == 0xFFFF); - assert(m_data[page_idx + 1] == Dbtup::Tuple_header::FREE); + assert(m_data[page_idx + 1] == FREE_RECORD); m_data[page_idx + 1] = 0; if (next != 0xFFFF) @@ -53,7 +67,7 @@ Uint32 Tup_fixsize_page::alloc_record(Uint32 page_idx) { assert(page_idx + 1 < DATA_WORDS); - if (likely(free_space && m_data[page_idx + 1] == Dbtup::Tuple_header::FREE)) + if (likely(free_space && m_data[page_idx + 1] == FREE_RECORD)) { Uint32 prev = m_data[page_idx] >> 16; Uint32 next = m_data[page_idx] & 0xFFFF; @@ -87,7 +101,7 @@ Tup_fixsize_page::free_record(Uint32 page_idx) Uint32 next = next_free_index; assert(page_idx + 1 < DATA_WORDS); - assert(m_data[page_idx + 1] != Dbtup::Tuple_header::FREE); + assert(m_data[page_idx + 1] != FREE_RECORD); if (next == 0xFFFF) { @@ -100,12 +114,12 @@ Tup_fixsize_page::free_record(Uint32 page_idx) Uint32 nextP = m_data[next]; assert((nextP >> 16) == 0xFFFF); m_data[next] = (page_idx << 16) | (nextP & 0xFFFF); - assert(m_data[next + 1] == Dbtup::Tuple_header::FREE); + assert(m_data[next + 1] == FREE_RECORD); } next_free_index = page_idx; m_data[page_idx] = 0xFFFF0000 | next; - m_data[page_idx + 1] = Dbtup::Tuple_header::FREE; + m_data[page_idx + 1] = FREE_RECORD; return ++free_space; } @@ -116,11 +130,123 @@ Tup_varsize_page::init() free_space= DATA_WORDS - 1; high_index= 1; insert_pos= 0; - next_free_index= 0xFFFF; + next_free_index= END_OF_FREE_LIST; m_page_header.m_page_type = File_formats::PT_Tup_varsize_page; } Uint32 +Tup_varsize_page::alloc_record(Uint32 page_idx, Uint32 alloc_size, + Tup_varsize_page* temp) +{ + assert(page_idx); // 0 is not allowed + Uint32 free = free_space; + Uint32 largest_size= DATA_WORDS - (insert_pos + high_index); + Uint32 free_list = next_free_index; + + if (page_idx < high_index) + { + Uint32 *ptr = get_index_ptr(page_idx); + Uint32 word = *ptr; + + if (unlikely((free < alloc_size) || ! (word & FREE))) + { + return ~0; + } + + if (alloc_size >= largest_size) + { + /* + We can't fit this segment between the insert position and the end of + the index entries. We will pack the page so that all free space + exists between the insert position and the end of the index entries. + */ + reorg(temp); + } + + Uint32 next = (word & NEXT_MASK) >> NEXT_SHIFT; + Uint32 prev = (word & PREV_MASK) >> PREV_SHIFT; + + if (next != END_OF_FREE_LIST) + { + Uint32 * next_ptr = get_index_ptr(next); + Uint32 next_word = * next_ptr; + * next_ptr = (next_word & ~PREV_MASK) | (prev << PREV_SHIFT); + } + + if (prev != END_OF_FREE_LIST) + { + Uint32 * prev_ptr = get_index_ptr(prev); + Uint32 prev_word = * prev_ptr; + * prev_ptr = (prev_word & ~NEXT_MASK) | (next << NEXT_SHIFT); + } + else + { + assert(next_free_index == page_idx); + next_free_index = next; + } + + * ptr = insert_pos + (alloc_size << LEN_SHIFT); + free -= alloc_size; + } + else + { + /** + * We need to expand directory + */ + Uint32 hi = high_index; + Uint32 expand = (page_idx + 1 - hi); + Uint32 size = alloc_size + expand; + if (unlikely(size > free)) + { + return ~0; + } + + if (size >= largest_size) + { + /* + We can't fit this segment between the insert position and the end of + the index entries. We will pack the page so that all free space + exists between the insert position and the end of the index entries. + */ + reorg(temp); + } + + Uint32 *ptr = m_data + DATA_WORDS - hi; + if (page_idx == hi) + { + * ptr = insert_pos + (alloc_size << LEN_SHIFT); + } + else + { + if (free_list != END_OF_FREE_LIST) + { + Uint32 * prev_ptr = get_index_ptr(free_list); + Uint32 prev_word = * prev_ptr; + * prev_ptr = (prev_word & ~PREV_MASK) | (hi << PREV_SHIFT); + } + + for (; hi < page_idx;) + { + * ptr-- = FREE | (free_list << NEXT_SHIFT) | ((hi+1) << PREV_SHIFT); + free_list = hi++; + } + + * ptr++ = insert_pos + (alloc_size << LEN_SHIFT); + * ptr = ((* ptr) & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT); + + next_free_index = hi - 1; + } + high_index = hi + 1; + free -= size; + } + + free_space = free; + insert_pos += alloc_size; + + return page_idx; +} + +Uint32 Tup_varsize_page::alloc_record(Uint32 alloc_size, Tup_varsize_page* temp, Uint32 chain) { @@ -138,7 +264,7 @@ Tup_varsize_page::alloc_record(Uint32 alloc_size, assert(largest_size > alloc_size); Uint32 page_idx; - if (next_free_index == 0xFFFF) { + if (next_free_index == END_OF_FREE_LIST) { /* We are out of free index slots. We will extend the array of free slots @@ -148,12 +274,21 @@ Tup_varsize_page::alloc_record(Uint32 alloc_size, } else { // Pick an empty slot among the index entries page_idx= next_free_index; - assert((get_index_word(page_idx) & 0xFFFF0000) == 0); - next_free_index= get_index_word(page_idx); + assert((get_index_word(page_idx) & FREE) == FREE); + assert(((get_index_word(page_idx) & PREV_MASK) >> PREV_SHIFT) == + END_OF_FREE_LIST); + next_free_index= (get_index_word(page_idx) & NEXT_MASK) >> NEXT_SHIFT; + assert(next_free_index); + if (next_free_index != END_OF_FREE_LIST) + { + Uint32 *ptr = get_index_ptr(next_free_index); + Uint32 word = *ptr; + * ptr = (word & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT); + } } assert(chain == 0 || chain == CHAIN); - * get_index_ptr(page_idx) = insert_pos + ((chain + alloc_size) << 16); + * get_index_ptr(page_idx) = insert_pos + chain + (alloc_size << LEN_SHIFT); insert_pos += alloc_size; free_space -= alloc_size; @@ -167,10 +302,10 @@ Tup_varsize_page::free_record(Uint32 page_idx, Uint32 chain) //ndbout_c("%p->free_record(%d%s)", this, page_idx, (chain ? " CHAIN": "")); Uint32 *index_ptr= get_index_ptr(page_idx); Uint32 index_word= * index_ptr; - Uint32 entry_pos= index_word & 0xFFFF; - Uint32 entry_len= (index_word >> 16) & ~CHAIN; + Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT; + Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT; assert(chain == 0 || chain == CHAIN); - assert(!(((index_word >> 16) ^ chain) & 0x8000)); + assert((index_word & CHAIN) == chain); #ifdef VM_TRACE memset(m_data + entry_pos, 0xF2, 4*entry_len); #endif @@ -183,8 +318,16 @@ Tup_varsize_page::free_record(Uint32 page_idx, Uint32 chain) */ rebuild_index(index_ptr); } else { - * index_ptr= next_free_index; + if (next_free_index != END_OF_FREE_LIST) + { + Uint32 *ptr = get_index_ptr(next_free_index); + Uint32 word = *ptr; + assert(((word & PREV_MASK) >> PREV_SHIFT) == END_OF_FREE_LIST); + * ptr = (word & ~PREV_MASK) | (page_idx << PREV_SHIFT); + } + * index_ptr= FREE | next_free_index | (END_OF_FREE_LIST << PREV_SHIFT); next_free_index= page_idx; + assert(next_free_index); } free_space+= entry_len; @@ -204,7 +347,7 @@ Tup_varsize_page::rebuild_index(Uint32* index_ptr) * Scan until you find first non empty index pos */ for(index_ptr++; index_ptr < end; index_ptr++) - if((* index_ptr >> 16) == 0) + if((* index_ptr) & FREE) empty++; else break; @@ -214,23 +357,30 @@ Tup_varsize_page::rebuild_index(Uint32* index_ptr) // Totally free page high_index = 1; free_space += empty; - next_free_index= 0xFFFF; + next_free_index = END_OF_FREE_LIST; return; } - - Uint32 next= 0xFFFF; - high_index -= empty; + + Uint32 next= END_OF_FREE_LIST; + Uint32 dummy; + Uint32 *prev_ptr = &dummy; for(index_ptr++; index_ptr < end; index_ptr++) { - if((* index_ptr >> 16) == 0) + if ((* index_ptr) & FREE) { - * index_ptr= next; + * index_ptr= FREE | next; next= (end - index_ptr); + * prev_ptr |= (next << PREV_SHIFT); + prev_ptr = index_ptr; } } + * prev_ptr |= (END_OF_FREE_LIST << PREV_SHIFT); + + high_index -= empty; free_space += empty; next_free_index= next; + assert(next_free_index); } void @@ -247,16 +397,17 @@ Tup_varsize_page::reorg(Tup_varsize_page* copy_page) for (; index_ptr < end_of_page; index_ptr++) { Uint32 index_word= * index_ptr; - Uint32 entry_len= (index_word >> 16) & ~CHAIN; - if (entry_len != 0) { + Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT; + if (!(index_word & FREE) && entry_len) + { /* We found an index item that needs to be packed. We will update the index entry and copy the data to the page. */ - Uint32 entry_pos= index_word & 0xffff; + Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT; assert(entry_pos + entry_len <= old_insert_pos); assert(new_insert_pos + entry_len <= old_insert_pos); - * index_ptr= new_insert_pos + (index_word & 0xFFFF0000); + * index_ptr= (new_insert_pos << POS_SHIFT) + (index_word & ~POS_MASK); memcpy(m_data+new_insert_pos, copy_page->m_data+entry_pos, 4*entry_len); new_insert_pos += entry_len; @@ -278,10 +429,10 @@ operator<< (NdbOut& out, const Tup_varsize_page& page) for(Uint32 i = 1; i<page.high_index; i++, index_ptr--) { out << " [ " << i; - if(*index_ptr >> 16) - out << " pos: " << ((*index_ptr) & 0xFFFF) - << " len: " << ((*index_ptr >> 16) & ~page.CHAIN) - << (((* index_ptr >> 16) & page.CHAIN) ? " CHAIN " : " ") + if(! (*index_ptr & page.FREE)) + out << " pos: " << ((* index_ptr & page.POS_MASK) >> page.POS_SHIFT) + << " len: " << ((* index_ptr & page.LEN_MASK) >> page.LEN_SHIFT) + << ((* index_ptr & page.CHAIN) ? " CHAIN " : " ") << "]" << flush; else out << " FREE ]" << flush; @@ -289,10 +440,10 @@ operator<< (NdbOut& out, const Tup_varsize_page& page) out << " free list: " << flush; Uint32 next= page.next_free_index; - while(next != 0xFFFF) + while(next != page.END_OF_FREE_LIST) { out << next << " " << flush; - next= * (page.m_data+page.DATA_WORDS-next); + next= ((* (page.m_data+page.DATA_WORDS-next)) & page.NEXT_MASK) >> page.NEXT_SHIFT; } out << "]"; return out; diff --git a/storage/ndb/src/kernel/blocks/dbtup/tuppage.hpp b/storage/ndb/src/kernel/blocks/dbtup/tuppage.hpp index beeb85d063b..04ed18da58d 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/tuppage.hpp +++ b/storage/ndb/src/kernel/blocks/dbtup/tuppage.hpp @@ -81,6 +81,7 @@ struct Tup_fixsize_page Uint32 m_extent_info_ptr; Uint32 unused_ph[9]; + STATIC_CONST( FREE_RECORD = ~(Uint32)0 ); STATIC_CONST( DATA_WORDS = File_formats::NDB_PAGE_SIZE_WORDS - 32 ); Uint32 m_data[DATA_WORDS]; @@ -127,7 +128,18 @@ struct Tup_varsize_page Uint32 unused_ph[7]; STATIC_CONST( DATA_WORDS = File_formats::NDB_PAGE_SIZE_WORDS - 32 ); - STATIC_CONST( CHAIN = 0x8000 ); + STATIC_CONST( CHAIN = 0x80000000 ); + STATIC_CONST( FREE = 0x40000000 ); + STATIC_CONST( LEN_MASK = 0x3FFF8000 ); + STATIC_CONST( POS_MASK = 0x00007FFF ); + STATIC_CONST( LEN_SHIFT = 15 ); + STATIC_CONST( POS_SHIFT = 0 ); + STATIC_CONST( END_OF_FREE_LIST = POS_MASK ); + + STATIC_CONST( NEXT_MASK = POS_MASK ); + STATIC_CONST( NEXT_SHIFT = POS_SHIFT ); + STATIC_CONST( PREV_MASK = LEN_MASK ); + STATIC_CONST( PREV_SHIFT = LEN_SHIFT ); Uint32 m_data[DATA_WORDS]; @@ -156,6 +168,12 @@ struct Tup_varsize_page * temp is used when having to reorg page before allocating */ Uint32 alloc_record(Uint32 size, Tup_varsize_page* temp, Uint32 chain); + + /** + * Alloc page_idx from page, return page_idx + * temp is used when having to reorg page before allocating + */ + Uint32 alloc_record(Uint32 page_idx, Uint32 size, Tup_varsize_page* temp); /** * Free record from page @@ -170,8 +188,8 @@ struct Tup_varsize_page */ bool is_space_behind_entry(Uint32 page_index, Uint32 growth_len) const { Uint32 idx= get_index_word(page_index); - Uint32 pos= idx & 0xFFFF; - Uint32 len= (idx >> 16) & ~CHAIN; + Uint32 pos= (idx & POS_MASK) >> POS_SHIFT; + Uint32 len= (idx & LEN_MASK) >> LEN_SHIFT; if ((pos + len == insert_pos) && (insert_pos + growth_len < DATA_WORDS - high_index)) return true; @@ -180,12 +198,14 @@ struct Tup_varsize_page void grow_entry(Uint32 page_index, Uint32 growth_len) { assert(free_space >= growth_len); - + Uint32 *pos= get_index_ptr(page_index); Uint32 idx= *pos; - Uint32 size= (idx >> 16) + growth_len; - *pos= (idx & 0xFFFF) + (size << 16); - assert((idx & 0xFFFF) + ((idx >> 16) & ~CHAIN) == insert_pos); + assert(! (idx & FREE)); + assert((((idx & POS_MASK) >> POS_SHIFT) + ((idx & LEN_MASK) >> LEN_SHIFT)) + == insert_pos); + + * pos= idx + (growth_len << LEN_SHIFT); insert_pos+= growth_len; free_space-= growth_len; } @@ -193,35 +213,42 @@ struct Tup_varsize_page void shrink_entry(Uint32 page_index, Uint32 new_size){ Uint32 *pos= get_index_ptr(page_index); Uint32 idx= *pos; - *pos= (idx & (CHAIN << 16 | 0xFFFF)) + (new_size << 16); - Uint32 old_size= (idx >> 16) & ~CHAIN; - + Uint32 old_pos = (idx & POS_MASK) >> POS_SHIFT; + Uint32 old_size = (idx & LEN_MASK) >> LEN_SHIFT; + + assert( ! (idx & FREE)); assert(old_size >= new_size); + + * pos= (idx & ~LEN_MASK) + (new_size << LEN_SHIFT); Uint32 shrink = old_size - new_size; #ifdef VM_TRACE - memset(m_data + (idx & 0xFFFF) + new_size, 0xF1, 4 * shrink); + memset(m_data + old_pos + new_size, 0xF1, 4 * shrink); #endif free_space+= shrink; - if(insert_pos == ((idx & 0xFFFF) + old_size)) + if(insert_pos == (old_pos + old_size)) insert_pos -= shrink; } Uint32* get_ptr(Uint32 page_idx) { - return m_data + (get_index_word(page_idx) & 0xFFFF); + return m_data + ((get_index_word(page_idx) & POS_MASK) >> POS_SHIFT); } void set_entry_offset(Uint32 page_idx, Uint32 offset){ Uint32 *pos= get_index_ptr(page_idx); - *pos = (* pos & 0xFFFF0000) + offset; + * pos = (* pos & ~POS_MASK) + (offset << POS_SHIFT); } + void set_entry_len(Uint32 page_idx, Uint32 len) { + Uint32 *pos= get_index_ptr(page_idx); + * pos = (*pos & ~LEN_MASK) + (len << LEN_SHIFT); + } + Uint32 get_entry_len(Uint32 page_idx) const { - return get_index_word(page_idx) >> 16; + return (get_index_word(page_idx) & LEN_MASK) >> LEN_SHIFT; } - void set_entry_len(Uint32 page_idx, Uint32 len) { - Uint32 *pos= get_index_ptr(page_idx); - *pos = (len << 16) + (*pos & (CHAIN << 16 | 0xFFFF)); + Uint32 get_entry_chain(Uint32 page_idx) const { + return get_index_word(page_idx) & CHAIN; } }; diff --git a/storage/ndb/src/kernel/blocks/lgman.cpp b/storage/ndb/src/kernel/blocks/lgman.cpp index 66ef87ce4d6..2997482c4bf 100644 --- a/storage/ndb/src/kernel/blocks/lgman.cpp +++ b/storage/ndb/src/kernel/blocks/lgman.cpp @@ -26,6 +26,7 @@ #include <signaldata/LCP.hpp> #include <signaldata/SumaImpl.hpp> #include <signaldata/LgmanContinueB.hpp> +#include <signaldata/GetTabInfo.hpp> #include "ndbfs/Ndbfs.hpp" #include "dbtup/Dbtup.hpp" @@ -86,7 +87,9 @@ Lgman::Lgman(const Configuration & conf) : addRecSignal(GSN_START_RECREQ, &Lgman::execSTART_RECREQ); addRecSignal(GSN_END_LCP_CONF, &Lgman::execEND_LCP_CONF); - + + addRecSignal(GSN_GET_TABINFOREQ, &Lgman::execGET_TABINFOREQ); + m_last_lsn = 0; m_logfile_group_pool.setSize(10); m_logfile_group_hash.setSize(10); @@ -701,6 +704,7 @@ Lgman::create_file_commit(Signal* signal, ptr.p->m_state = Undofile::FS_SORTING; } + ptr.p->m_online.m_lsn = 0; ptr.p->m_online.m_outstanding = 0; Uint64 add= ptr.p->m_file_size - 1; @@ -1648,7 +1652,7 @@ Lgman::execLCP_FRAG_ORD(Signal* signal) sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); } - if(!ptr.isNull()) + if(!ptr.isNull() && ptr.p->m_last_lsn) { Uint32 undo[3]; undo[0] = lcp_id; @@ -1686,24 +1690,26 @@ Lgman::execLCP_FRAG_ORD(Signal* signal) while(!ptr.isNull()) { - /** - * First LCP_FRAGORD for each LCP, sets tail pos - */ - if(m_latest_lcp != lcp_id) + if (ptr.p->m_last_lsn) { - ptr.p->m_tail_pos[0] = ptr.p->m_tail_pos[1]; - ptr.p->m_tail_pos[1] = ptr.p->m_tail_pos[2]; - ptr.p->m_tail_pos[2] = ptr.p->m_file_pos[HEAD]; + /** + * First LCP_FRAGORD for each LCP, sets tail pos + */ + if(m_latest_lcp != lcp_id) + { + ptr.p->m_tail_pos[0] = ptr.p->m_tail_pos[1]; + ptr.p->m_tail_pos[1] = ptr.p->m_tail_pos[2]; + ptr.p->m_tail_pos[2] = ptr.p->m_file_pos[HEAD]; + } + + if(0) + ndbout_c + ("execLCP_FRAG_ORD (%d %d) (%d %d) (%d %d) free pages: %d", + ptr.p->m_tail_pos[0].m_ptr_i, ptr.p->m_tail_pos[0].m_idx, + ptr.p->m_tail_pos[1].m_ptr_i, ptr.p->m_tail_pos[1].m_idx, + ptr.p->m_tail_pos[2].m_ptr_i, ptr.p->m_tail_pos[2].m_idx, + (ptr.p->m_free_file_words / File_formats::UNDO_PAGE_WORDS)); } - - if(0) - ndbout_c - ("execLCP_FRAG_ORD (%d %d) (%d %d) (%d %d) free pages: %d", - ptr.p->m_tail_pos[0].m_ptr_i, ptr.p->m_tail_pos[0].m_idx, - ptr.p->m_tail_pos[1].m_ptr_i, ptr.p->m_tail_pos[1].m_idx, - ptr.p->m_tail_pos[2].m_ptr_i, ptr.p->m_tail_pos[2].m_idx, - (ptr.p->m_free_file_words / File_formats::UNDO_PAGE_WORDS)); - m_logfile_group_list.next(ptr); } @@ -1761,47 +1767,50 @@ Lgman::endlcp_callback(Signal* signal, Uint32 ptr, Uint32 res) void Lgman::cut_log_tail(Signal* signal, Ptr<Logfile_group> ptr) { - Buffer_idx tmp= ptr.p->m_tail_pos[0]; - Buffer_idx tail= ptr.p->m_file_pos[TAIL]; - - Ptr<Undofile> filePtr; - m_file_pool.getPtr(filePtr, tail.m_ptr_i); - bool done= true; - if(!(tmp == tail)) + if (likely(ptr.p->m_last_lsn)) { - Uint32 free; - if(tmp.m_ptr_i == tail.m_ptr_i && tail.m_idx < tmp.m_idx) - { - free= tmp.m_idx - tail.m_idx; - ptr.p->m_free_file_words += free * File_formats::UNDO_PAGE_WORDS; - ptr.p->m_file_pos[TAIL] = tmp; - } - else + Buffer_idx tmp= ptr.p->m_tail_pos[0]; + Buffer_idx tail= ptr.p->m_file_pos[TAIL]; + + Ptr<Undofile> filePtr; + m_file_pool.getPtr(filePtr, tail.m_ptr_i); + + if(!(tmp == tail)) { - free= filePtr.p->m_file_size - tail.m_idx - 1; - ptr.p->m_free_file_words += free * File_formats::UNDO_PAGE_WORDS; - - Ptr<Undofile> next = filePtr; - LocalDLFifoList<Undofile> files(m_file_pool, ptr.p->m_files); - while(files.next(next) && (next.p->m_state & Undofile::FS_EMPTY)) - ndbassert(next.i != filePtr.i); - if(next.isNull()) + Uint32 free; + if(tmp.m_ptr_i == tail.m_ptr_i && tail.m_idx < tmp.m_idx) { - jam(); - files.first(next); - while((next.p->m_state & Undofile::FS_EMPTY) && files.next(next)) + free= tmp.m_idx - tail.m_idx; + ptr.p->m_free_file_words += free * File_formats::UNDO_PAGE_WORDS; + ptr.p->m_file_pos[TAIL] = tmp; + } + else + { + free= filePtr.p->m_file_size - tail.m_idx - 1; + ptr.p->m_free_file_words += free * File_formats::UNDO_PAGE_WORDS; + + Ptr<Undofile> next = filePtr; + LocalDLFifoList<Undofile> files(m_file_pool, ptr.p->m_files); + while(files.next(next) && (next.p->m_state & Undofile::FS_EMPTY)) ndbassert(next.i != filePtr.i); + if(next.isNull()) + { + jam(); + files.first(next); + while((next.p->m_state & Undofile::FS_EMPTY) && files.next(next)) + ndbassert(next.i != filePtr.i); + } + + tmp.m_idx= 0; + tmp.m_ptr_i= next.i; + ptr.p->m_file_pos[TAIL] = tmp; + done= false; } - - tmp.m_idx= 0; - tmp.m_ptr_i= next.i; - ptr.p->m_file_pos[TAIL] = tmp; - done= false; - } - } - - validate_logfile_group(ptr, "cut log"); + } + + validate_logfile_group(ptr, "cut log"); + } if (done) { @@ -2946,3 +2955,71 @@ Lgman::validate_logfile_group(Ptr<Logfile_group> ptr, const char * heading) } } #endif + +void Lgman::execGET_TABINFOREQ(Signal* signal) +{ + jamEntry(); + + if(!assembleFragments(signal)) + { + return; + } + + GetTabInfoReq * const req = (GetTabInfoReq *)&signal->theData[0]; + + const Uint32 reqType = req->requestType & (~GetTabInfoReq::LongSignalConf); + BlockReference retRef= req->senderRef; + Uint32 senderData= req->senderData; + Uint32 tableId= req->tableId; + + if(reqType == GetTabInfoReq::RequestByName){ + jam(); + if(signal->getNoOfSections()) + releaseSections(signal); + + sendGET_TABINFOREF(signal, req, GetTabInfoRef::NoFetchByName); + return; + } + + Logfile_group key; + key.m_logfile_group_id= tableId; + Ptr<Logfile_group> ptr; + m_logfile_group_hash.find(ptr, key); + + if(ptr.p->m_logfile_group_id != tableId) + { + jam(); + if(signal->getNoOfSections()) + releaseSections(signal); + + sendGET_TABINFOREF(signal, req, GetTabInfoRef::InvalidTableId); + return; + } + + + GetTabInfoConf *conf = (GetTabInfoConf *)&signal->theData[0]; + + conf->senderData= senderData; + conf->tableId= tableId; + conf->freeWordsHi= ptr.p->m_free_file_words >> 32; + conf->freeWordsLo= ptr.p->m_free_file_words & 0xFFFFFFFF; + conf->tableType= DictTabInfo::LogfileGroup; + conf->senderRef= reference(); + sendSignal(retRef, GSN_GET_TABINFO_CONF, signal, + GetTabInfoConf::SignalLength, JBB); +} + +void Lgman::sendGET_TABINFOREF(Signal* signal, + GetTabInfoReq * req, + GetTabInfoRef::ErrorCode errorCode) +{ + jamEntry(); + GetTabInfoRef * const ref = (GetTabInfoRef *)&signal->theData[0]; + /** + * The format of GetTabInfo Req/Ref is the same + */ + BlockReference retRef = req->senderRef; + ref->errorCode = errorCode; + + sendSignal(retRef, GSN_GET_TABINFOREF, signal, signal->length(), JBB); +} diff --git a/storage/ndb/src/kernel/blocks/lgman.hpp b/storage/ndb/src/kernel/blocks/lgman.hpp index 745a1bfa45a..840f1393b5a 100644 --- a/storage/ndb/src/kernel/blocks/lgman.hpp +++ b/storage/ndb/src/kernel/blocks/lgman.hpp @@ -26,6 +26,7 @@ #include <DLHashTable.hpp> #include <NodeBitmask.hpp> #include "diskpage.hpp" +#include <signaldata/GetTabInfo.hpp> class Lgman : public SimulatedBlock { @@ -66,6 +67,13 @@ protected: void execSTART_RECREQ(Signal*); void execEND_LCP_CONF(Signal*); + + void execGET_TABINFOREQ(Signal*); + + void sendGET_TABINFOREF(Signal* signal, + GetTabInfoReq * req, + GetTabInfoRef::ErrorCode errorCode); + public: struct Log_waiter { diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp index 75ab83b2e98..6947a4902a1 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp @@ -341,12 +341,14 @@ void AsyncFile::openReq(Request* request) #endif } +#ifndef NDB_NO_O_DIRECT /* to allow tmpfs */ #ifdef O_DIRECT if (flags & FsOpenReq::OM_DIRECT) { new_flags |= O_DIRECT; } #endif +#endif switch(flags & 0x3){ case FsOpenReq::OM_READONLY: diff --git a/storage/ndb/src/kernel/blocks/pgman.cpp b/storage/ndb/src/kernel/blocks/pgman.cpp index 7ff3ab7eff4..7525f9c2402 100644 --- a/storage/ndb/src/kernel/blocks/pgman.cpp +++ b/storage/ndb/src/kernel/blocks/pgman.cpp @@ -1359,8 +1359,8 @@ void Pgman::fsreadreq(Signal* signal, Ptr<Page_entry> ptr) { File_map::ConstDataBufferIterator it; - m_file_map.first(it); - m_file_map.next(it, ptr.p->m_file_no); + bool ret = m_file_map.first(it) && m_file_map.next(it, ptr.p->m_file_no); + ndbrequire(ret); Uint32 fd = * it.data; ndbrequire(ptr.p->m_page_no > 0); @@ -1479,11 +1479,6 @@ Pgman::get_page(Signal* signal, Ptr<Page_entry> ptr, Page_request page_req) { busy_count = true; state |= Page_entry::BUSY; - /* - * Consider commit to be correlated. Otherwise pk op + commit makes - * the page hot. XXX move to TUP which knows better. - */ - req_flags |= Page_request::CORR_REQ; } else if ((req_flags & Page_request::OP_MASK) != ZREAD) { @@ -2196,11 +2191,14 @@ Pgman::execDUMP_STATE_ORD(Signal* signal) } } + if (signal->theData[0] == 11003) { #ifdef VM_TRACE verify_page_lists(); dump_page_lists(); +#else + ndbout << "Only in VM_TRACE builds" << endl; #endif } } diff --git a/storage/ndb/src/kernel/blocks/pgman.hpp b/storage/ndb/src/kernel/blocks/pgman.hpp index da59afa5794..b616e169381 100644 --- a/storage/ndb/src/kernel/blocks/pgman.hpp +++ b/storage/ndb/src/kernel/blocks/pgman.hpp @@ -518,6 +518,7 @@ public: ,DIRTY_REQ = Pgman::Page_request::DIRTY_REQ ,NO_HOOK = Pgman::Page_request::NO_HOOK ,UNLOCK_PAGE = Pgman::Page_request::UNLOCK_PAGE + ,CORR_REQ = Pgman::Page_request::CORR_REQ }; /** diff --git a/storage/ndb/src/kernel/blocks/restore.cpp b/storage/ndb/src/kernel/blocks/restore.cpp index 31dea0dfcdf..c4cac7abe74 100644 --- a/storage/ndb/src/kernel/blocks/restore.cpp +++ b/storage/ndb/src/kernel/blocks/restore.cpp @@ -817,7 +817,7 @@ Restore::parse_table_description(Signal* signal, FilePtr file_ptr, c.m_flags |= (tmp.AttributeStorageType == NDB_STORAGETYPE_DISK ? Column::COL_DISK : 0); - if(lcp && c.m_flags & Column::COL_DISK) + if(lcp && (c.m_flags & Column::COL_DISK)) { /** * Restore does not currently handle disk attributes @@ -829,7 +829,6 @@ Restore::parse_table_description(Signal* signal, FilePtr file_ptr, if(!tmp.AttributeNullableFlag && !varsize) { - c.m_nulloffset = 0; if(!columns.append(_align, sizeof(Column)/sizeof(Uint32))) { parse_error(signal, file_ptr, __LINE__, i); @@ -838,53 +837,55 @@ Restore::parse_table_description(Signal* signal, FilePtr file_ptr, } else if (true) // null mask dropped in 5.1 { - c.m_nulloffset = (tmp.AttributeNullableFlag != 0); - if (varsize) - c.m_flags |= Column::COL_VAR; + c.m_flags |= (varsize ? Column::COL_VAR : 0); + c.m_flags |= (tmp.AttributeNullableFlag ? Column::COL_NULL : 0); if(!columns.append(_align, sizeof(Column)/sizeof(Uint32))) { parse_error(signal, file_ptr, __LINE__, i); return; } } - else + } + + if(lcp) + { + if (disk) { - c.m_nulloffset = 1 + null_offset++; - c.m_flags |= Column::COL_VAR; - if(!variable.append(_align, sizeof(Column)/sizeof(Uint32))) + c.m_id = AttributeHeader::DISK_REF; + c.m_size = 2; + c.m_flags = 0; + if(!columns.append(_align, sizeof(Column)/sizeof(Uint32))) { - parse_error(signal, file_ptr, __LINE__, i); + parse_error(signal, file_ptr, __LINE__, 0); return; } } - } - if(lcp && disk) - { - c.m_id = AttributeHeader::DISK_REF; - c.m_size = 2; - c.m_nulloffset = 0; - c.m_flags = 0; - if(!columns.append(_align, sizeof(Column)/sizeof(Uint32))) { - parse_error(signal, file_ptr, __LINE__, 0); - return; + c.m_id = AttributeHeader::ROWID; + c.m_size = 2; + c.m_flags = 0; + if(!columns.append(_align, sizeof(Column)/sizeof(Uint32))) + { + parse_error(signal, file_ptr, __LINE__, 0); + return; + } } - } - - file_ptr.p->m_table_version = tmpTab.TableVersion; - file_ptr.p->m_null_bitmask_size = (null_offset + 31)/32; -#if 0 - List::Iterator cols; - for(variable.first(cols); !cols.isNull(); variable.next(cols)) - { - if(!columns.append(cols.data, 1)) + + if (tmpTab.RowGCIFlag) { - parse_error(signal, file_ptr, __LINE__, 0); + c.m_id = AttributeHeader::ROW_GCI; + c.m_size = 2; + c.m_flags = 0; + if(!columns.append(_align, sizeof(Column)/sizeof(Uint32))) + { + parse_error(signal, file_ptr, __LINE__, 0); + return; + } } } - return ; -#endif + + file_ptr.p->m_table_version = tmpTab.TableVersion; } void @@ -927,11 +928,8 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, Uint32 * const key_start = signal->getDataPtrSend()+24; Uint32 * const attr_start = key_start + MAX_KEY_SIZE_IN_WORDS; - Uint32 nulls= file_ptr.p->m_null_bitmask_size; - const Uint32 *null_mask= data+1; - data += (1+nulls); + data += 1; const Uint32* const dataStart = data; - //if (file_ptr.p->m_table_id >= 2) { for (uint ii = 0; ii+1<len; ii++) ndbout << hex << dataStart[ii]; ndbout << endl; } Uint32 *keyData = key_start; Uint32 *attrData = attr_start; @@ -939,19 +937,36 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, Column c; Uint32 _align[1]; }; - bool disk= false; + bool disk = false; + bool rowid = false; + bool gci = false; - //if (file_ptr.p->m_table_id >= 2) { ndbout << "*** "; columns.first(it); while (!it.isNull()) { _align[0] = *it.data; columns.next(it); _align[1] = *it.data; columns.next(it); ndbout << c << " "; } ndbout << endl; } - - Uint32 column_no = 0; + Uint64 gci_val; + Local_key rowid_val; columns.first(it); while(!it.isNull()) { _align[0] = *it.data; ndbrequire(columns.next(it)); _align[1] = *it.data; columns.next(it); - if (! (c.m_flags & Column::COL_VAR) && - ! c.m_nulloffset) + if (c.m_id == AttributeHeader::ROWID) + { + rowid_val.m_page_no = data[0]; + rowid_val.m_page_idx = data[1]; + data += 2; + rowid = true; + continue; + } + + if (c.m_id == AttributeHeader::ROW_GCI) + { + memcpy(&gci_val, data, 8); + data += 2; + gci = true; + continue; + } + + if (! (c.m_flags & (Column::COL_VAR | Column::COL_NULL))) { ndbrequire(data < dataStart + len); @@ -965,11 +980,8 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, memcpy(attrData, data, 4*c.m_size); attrData += c.m_size; data += c.m_size; - //if (file_ptr.p->m_table_id >= 2) ndbout << "1: " << c.m_id << " " << c.m_size << " col=" << column_no << endl; } - column_no++; - if(c.m_flags & Column::COL_DISK) disk= true; } @@ -985,10 +997,9 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, _align[1] = *it.data; Uint32 sz32 = (sz + 3) >> 2; - + ndbassert(c.m_flags & (Column::COL_VAR | Column::COL_NULL)); if(c.m_flags & Column::COL_KEY) { - assert(! c.m_nulloffset && c.m_flags & Column::COL_VAR); memcpy(keyData, data, 4 * sz32); keyData += sz32; } @@ -998,13 +1009,12 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, attrData += sz32; data += sz32; - //if (file_ptr.p->m_table_id >= 2) ndbout << "2: " << c.m_id << " " << sz << endl; } ndbrequire(data == dataStart + len - 1); ndbrequire(disk == false); // Not supported... - + ndbrequire(rowid == true); Uint32 keyLen = keyData - key_start; Uint32 attrLen = attrData - attr_start; LqhKeyReq * req = (LqhKeyReq *)signal->getDataPtrSend(); @@ -1029,7 +1039,6 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, tmp= 0; LqhKeyReq::setKeyLen(tmp, keyLen); LqhKeyReq::setLastReplicaNo(tmp, 0); - LqhKeyReq::setLockType(tmp, ZINSERT); /* ---------------------------------------------------------------------- */ // Indicate Application Reference is present in bit 15 /* ---------------------------------------------------------------------- */ @@ -1040,7 +1049,8 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, LqhKeyReq::setSameClientAndTcFlag(tmp, 0); LqhKeyReq::setAIInLqhKeyReq(tmp, 0); LqhKeyReq::setNoDiskFlag(tmp, disk ? 0 : 1); - //LqhKeyReq::setExecuteDirectFlag(tmp, 1); + LqhKeyReq::setRowidFlag(tmp, 1); + LqhKeyReq::setGCIFlag(tmp, gci); req->clientConnectPtr = file_ptr.i; req->hashValue = hashValue; req->requestInfo = tmp; @@ -1053,10 +1063,15 @@ Restore::parse_record(Signal* signal, FilePtr file_ptr, req->transId2 = 0; req->scanInfo = 0; memcpy(req->variableData, key_start, 16); - + Uint32 pos = keyLen > 4 ? 4 : keyLen; + req->variableData[pos++] = rowid_val.m_page_no; + req->variableData[pos++] = rowid_val.m_page_idx; + if (gci) + req->variableData[pos++] = (Uint32)gci_val; file_ptr.p->m_outstanding_operations++; - EXECUTE_DIRECT(DBLQH, GSN_LQHKEYREQ, signal, 11+(keyLen > 4 ? 4 : keyLen)); - + EXECUTE_DIRECT(DBLQH, GSN_LQHKEYREQ, signal, + LqhKeyReq::FixedSignalLength+pos); + if(keyLen > 4) { c_lqh->receive_keyinfo(signal, @@ -1105,10 +1120,13 @@ Restore::reorder_key(const KeyDescriptor* desc, memcpy(dst, var, 4 * sz); var += sz; break; + default: + ndbrequire(false); + sz = 0; } dst += sz; } - assert((dst - Tmp) == len); + ndbassert((dst - Tmp) == len); memcpy(data, Tmp, 4*len); } @@ -1201,9 +1219,9 @@ operator << (NdbOut& ndbout, const Restore::Column& col) { ndbout << "[ Col: id: " << col.m_id << " size: " << col.m_size - << " nulloffset: " << col.m_nulloffset << " key: " << (Uint32)(col.m_flags & Restore::Column::COL_KEY) << " variable: " << (Uint32)(col.m_flags & Restore::Column::COL_VAR) + << " null: " << (Uint32)(col.m_flags & Restore::Column::COL_NULL) << " disk: " << (Uint32)(col.m_flags & Restore::Column::COL_DISK) << "]"; diff --git a/storage/ndb/src/kernel/blocks/restore.hpp b/storage/ndb/src/kernel/blocks/restore.hpp index 12d093b4593..e1af7ddb163 100644 --- a/storage/ndb/src/kernel/blocks/restore.hpp +++ b/storage/ndb/src/kernel/blocks/restore.hpp @@ -60,14 +60,15 @@ public: { Uint16 m_id; Uint16 m_size; - Uint16 m_nulloffset; // 0 = not nullable + Uint16 m_unused; Uint16 m_flags; enum Flags { COL_KEY = 0x1, COL_VAR = 0x2, - COL_DISK = 0x4 + COL_DISK = 0x4, + COL_NULL = 0x8 }; }; private: @@ -98,7 +99,6 @@ private: Uint32 m_table_version; Uint32 m_fragment_id; List::Head m_columns; - Uint32 m_null_bitmask_size; Uint32 m_current_page_ptr_i; Uint32 m_current_page_pos; diff --git a/storage/ndb/src/kernel/blocks/suma/Suma.cpp b/storage/ndb/src/kernel/blocks/suma/Suma.cpp index b2f2cbe2a09..7cc712162e3 100644 --- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp +++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp @@ -14,6 +14,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <my_config.h> #include "Suma.hpp" #include <ndb_version.h> @@ -3149,7 +3150,8 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* signal) Page_pos pos= bucket->m_buffer_head; ndbrequire(pos.m_max_gci < gci); - Buffer_page* page= (Buffer_page*)(m_tup->cpage+pos.m_page_id); + Buffer_page* page= (Buffer_page*) + m_tup->c_page_pool.getPtr(pos.m_page_id); ndbout_c("takeover %d", pos.m_page_id); page->m_max_gci = pos.m_max_gci; page->m_words_used = pos.m_page_pos; @@ -4091,7 +4093,7 @@ Suma::get_buffer_ptr(Signal* signal, Uint32 buck, Uint32 gci, Uint32 sz) Bucket* bucket= c_buckets+buck; Page_pos pos= bucket->m_buffer_head; - Buffer_page* page= (Buffer_page*)(m_tup->cpage+pos.m_page_id); + Buffer_page* page= (Buffer_page*)m_tup->c_page_pool.getPtr(pos.m_page_id); Uint32* ptr= page->m_data + pos.m_page_pos; const bool same_gci = (gci == pos.m_last_gci) && (!ERROR_INSERTED(13022)); @@ -4150,7 +4152,7 @@ loop: pos.m_page_pos = sz; pos.m_last_gci = gci; - page= (Buffer_page*)(m_tup->cpage+pos.m_page_id); + page= (Buffer_page*)m_tup->c_page_pool.getPtr(pos.m_page_id); page->m_next_page= RNIL; ptr= page->m_data; goto loop; // @@ -4181,7 +4183,7 @@ Suma::out_of_buffer_release(Signal* signal, Uint32 buck) if(tail != RNIL) { - Buffer_page* page= (Buffer_page*)(m_tup->cpage+tail); + Buffer_page* page= (Buffer_page*)m_tup->c_page_pool.getPtr(tail); bucket->m_buffer_tail = page->m_next_page; free_page(tail, page); signal->theData[0] = SumaContinueB::OUT_OF_BUFFER_RELEASE; @@ -4225,8 +4227,8 @@ loop: Uint32 ref= m_first_free_page; if(likely(ref != RNIL)) { - m_first_free_page = ((Buffer_page*)m_tup->cpage+ref)->m_next_page; - Uint32 chunk = ((Buffer_page*)m_tup->cpage+ref)->m_page_chunk_ptr_i; + m_first_free_page = ((Buffer_page*)m_tup->c_page_pool.getPtr(ref))->m_next_page; + Uint32 chunk = ((Buffer_page*)m_tup->c_page_pool.getPtr(ref))->m_page_chunk_ptr_i; c_page_chunk_pool.getPtr(ptr, chunk); ndbassert(ptr.p->m_free); ptr.p->m_free--; @@ -4249,7 +4251,7 @@ loop: Buffer_page* page; for(Uint32 i = 0; i<count; i++) { - page = (Buffer_page*)(m_tup->cpage+ref); + page = (Buffer_page*)m_tup->c_page_pool.getPtr(ref); page->m_page_state= SUMA_SEQUENCE; page->m_page_chunk_ptr_i = ptr.i; page->m_next_page = ++ref; @@ -4313,7 +4315,7 @@ Suma::release_gci(Signal* signal, Uint32 buck, Uint32 gci) else { jam(); - Buffer_page* page= (Buffer_page*)(m_tup->cpage+tail); + Buffer_page* page= (Buffer_page*)m_tup->c_page_pool.getPtr(tail); Uint32 max_gci = page->m_max_gci; Uint32 next_page = page->m_next_page; @@ -4406,7 +4408,7 @@ Suma::resend_bucket(Signal* signal, Uint32 buck, Uint32 min_gci, Bucket* bucket= c_buckets+buck; Uint32 tail= bucket->m_buffer_tail; - Buffer_page* page= (Buffer_page*)(m_tup->cpage+tail); + Buffer_page* page= (Buffer_page*)m_tup->c_page_pool.getPtr(tail); Uint32 max_gci = page->m_max_gci; Uint32 next_page = page->m_next_page; Uint32 *ptr = page->m_data + pos; diff --git a/storage/ndb/src/kernel/blocks/tsman.cpp b/storage/ndb/src/kernel/blocks/tsman.cpp index 324f909d78b..2bb08010abe 100644 --- a/storage/ndb/src/kernel/blocks/tsman.cpp +++ b/storage/ndb/src/kernel/blocks/tsman.cpp @@ -390,7 +390,6 @@ Tsman::execDROP_FILEGROUP_REQ(Signal* signal){ if (errorCode) { - ndbassert(false); DropFilegroupImplRef* ref = (DropFilegroupImplRef*)signal->getDataPtrSend(); ref->senderRef = reference(); @@ -1084,7 +1083,9 @@ Tsman::load_extent_page_callback(Signal* signal, Ptr<Tablespace> ts_ptr; m_tablespace_pool.getPtr(ts_ptr, ptr.p->m_tablespace_ptr_i); - if (!getNodeState().getSystemRestartInProgress()) + if (getNodeState().startLevel >= NodeState::SL_STARTED || + (getNodeState().getNodeRestartInProgress() && + getNodeState().starting.restartType == NodeState::ST_INITIAL_NODE_RESTART)) { LocalDLList<Datafile> free(m_file_pool, ts_ptr.p->m_free_files); LocalDLList<Datafile> meta(m_file_pool, ts_ptr.p->m_meta_files); @@ -1615,6 +1616,57 @@ Tsman::update_page_free_bits(Signal* signal, } int +Tsman::get_page_free_bits(Signal* signal, Local_key *key, unsigned* bits) +{ + jamEntry(); + + /** + * XXX make into subroutine + */ + Ptr<Datafile> file_ptr; + Datafile file_key; + file_key.m_file_no = key->m_file_no; + ndbrequire(m_file_hash.find(file_ptr, file_key)); + + Uint32 size = file_ptr.p->m_extent_size; + Uint32 data_off = file_ptr.p->m_online.m_offset_data_pages; + Uint32 eh_words = File_formats::Datafile::extent_header_words(size); + Uint32 per_page = File_formats::Datafile::EXTENT_PAGE_WORDS/eh_words; + Uint32 SZ= File_formats::Datafile::EXTENT_HEADER_BITMASK_BITS_PER_PAGE; + + Uint32 extent = (key->m_page_no - data_off) / size + per_page; + Uint32 page_no = extent / per_page; + Uint32 extent_no = extent % per_page; + + Page_cache_client::Request preq; + preq.m_page.m_page_no = page_no; + preq.m_page.m_file_no = key->m_file_no; + + /** + * Handling of unmapped extent header pages is not implemented + */ + int flags = 0; + int real_page_id; + if ((real_page_id = m_page_cache_client.get_page(signal, preq, flags)) > 0) + { + GlobalPage* ptr_p = m_page_cache_client.m_ptr.p; + + File_formats::Datafile::Extent_page* page = + (File_formats::Datafile::Extent_page*)ptr_p; + File_formats::Datafile::Extent_header* header = + page->get_header(extent_no, size); + + ndbrequire(header->m_table != RNIL); + + Uint32 page_no_in_extent = (key->m_page_no - data_off) % size; + *bits = header->get_free_bits(page_no_in_extent); + return 0; + } + + return AllocExtentReq::UnmappedExtentPageIsNotImplemented; +} + +int Tsman::unmap_page(Signal* signal, Local_key *key) { jamEntry(); @@ -2055,17 +2107,23 @@ void Tsman::execGET_TABINFOREQ(Signal* signal) if(reqType == GetTabInfoReq::RequestByName){ jam(); releaseSections(signal); - - sendGET_TABINFOREF(signal, req, GetTabInfoRef::TableNameTooLong); + + sendGET_TABINFOREF(signal, req, GetTabInfoRef::NoFetchByName); return; } DLHashTable<Datafile>::Iterator iter; ndbrequire(m_file_hash.first(iter)); + while(iter.curr.p->m_file_id != tableId && m_file_hash.next(iter)) ; - ndbrequire(iter.curr.p->m_file_id == tableId); - + + if(iter.curr.p->m_file_id != tableId) + { + sendGET_TABINFOREF(signal, req, GetTabInfoRef::InvalidTableId); + return; + } + const Ptr<Datafile> &file_ptr= iter.curr; jam(); @@ -2073,9 +2131,9 @@ void Tsman::execGET_TABINFOREQ(Signal* signal) Uint32 total_free_extents = file_ptr.p->m_online.m_data_pages; total_free_extents /= file_ptr.p->m_extent_size; total_free_extents -= file_ptr.p->m_online.m_used_extent_cnt; - + GetTabInfoConf *conf = (GetTabInfoConf *)&signal->theData[0]; - + conf->senderData= senderData; conf->tableId= tableId; conf->freeExtents= total_free_extents; diff --git a/storage/ndb/src/kernel/blocks/tsman.hpp b/storage/ndb/src/kernel/blocks/tsman.hpp index cbe5e356440..194178163e8 100644 --- a/storage/ndb/src/kernel/blocks/tsman.hpp +++ b/storage/ndb/src/kernel/blocks/tsman.hpp @@ -196,6 +196,7 @@ private: void create_file_ref(Signal*, Ptr<Tablespace>, Ptr<Datafile>, Uint32,Uint32,Uint32); int update_page_free_bits(Signal*, Local_key*, unsigned bits, Uint64 lsn); + int get_page_free_bits(Signal*, Local_key*, unsigned* bits); int unmap_page(Signal*, Local_key*); int restart_undo_page_free_bits(Signal*, Local_key*, unsigned, Uint64); @@ -267,6 +268,11 @@ public: int update_page_free_bits(Local_key*, unsigned bits, Uint64 lsn); /** + * Get page free bits + */ + int get_page_free_bits(Local_key*, unsigned* bits); + + /** * Update unlogged page free bit */ int unmap_page(Local_key*); @@ -354,6 +360,13 @@ Tablespace_client::update_page_free_bits(Local_key *key, inline int +Tablespace_client::get_page_free_bits(Local_key *key, unsigned* bits) +{ + return m_tsman->get_page_free_bits(m_signal, key, bits); +} + +inline +int Tablespace_client::unmap_page(Local_key *key) { return m_tsman->unmap_page(m_signal, key); diff --git a/storage/ndb/src/kernel/vm/ArrayPool.hpp b/storage/ndb/src/kernel/vm/ArrayPool.hpp index 9133bbf6d36..e4fd0c63058 100644 --- a/storage/ndb/src/kernel/vm/ArrayPool.hpp +++ b/storage/ndb/src/kernel/vm/ArrayPool.hpp @@ -42,7 +42,8 @@ public: * * Note, can currently only be called once */ - bool setSize(Uint32 noOfElements, bool align = false, bool exit_on_error = true); + bool setSize(Uint32 noOfElements, bool align = false, bool exit_on_error = true, bool guard = true); + bool set(T*, Uint32 cnt, bool align = false); inline Uint32 getNoOfFree() const { return noOfFree; @@ -202,7 +203,8 @@ ArrayPool<T>::~ArrayPool(){ theArray = 0; alloc_ptr = 0; #ifdef ARRAY_GUARD - delete []theAllocatedBitmask; + if (theAllocatedBitmask) + delete []theAllocatedBitmask; theAllocatedBitmask = 0; #endif } @@ -216,7 +218,8 @@ ArrayPool<T>::~ArrayPool(){ template <class T> inline bool -ArrayPool<T>::setSize(Uint32 noOfElements, bool align, bool exit_on_error){ +ArrayPool<T>::setSize(Uint32 noOfElements, + bool align, bool exit_on_error, bool guard){ if(size == 0){ if(noOfElements == 0) return true; @@ -257,9 +260,12 @@ ArrayPool<T>::setSize(Uint32 noOfElements, bool align, bool exit_on_error){ firstFree = 0; #ifdef ARRAY_GUARD - bitmaskSz = (noOfElements + 31) >> 5; - theAllocatedBitmask = new Uint32[bitmaskSz]; - BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask); + if (guard) + { + bitmaskSz = (noOfElements + 31) >> 5; + theAllocatedBitmask = new Uint32[bitmaskSz]; + BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask); + } #endif return true; @@ -270,21 +276,56 @@ ArrayPool<T>::setSize(Uint32 noOfElements, bool align, bool exit_on_error){ ErrorReporter::handleAssert("ArrayPool<T>::setSize called twice", __FILE__, __LINE__); return false; // not reached } + +template <class T> +inline +bool +ArrayPool<T>::set(T* ptr, Uint32 cnt, bool align){ + if (size == 0) + { + alloc_ptr = ptr; + if(align) + { + UintPtr p = (UintPtr)alloc_ptr; + UintPtr mod = p % sizeof(T); + if (mod) + { + p += sizeof(T) - mod; + cnt --; + } + theArray = (T *)p; + } + else + { + theArray = (T *)alloc_ptr; + } + + size = cnt; + noOfFree = 0; + return true; + } + ErrorReporter::handleAssert("ArrayPool<T>::set called twice", + __FILE__, __LINE__); + return false; // not reached +} template <class T> inline void ArrayPool<T>::getPtr(Ptr<T> & ptr){ Uint32 i = ptr.i; - if(i < size){ + if(likely (i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); @@ -296,15 +337,18 @@ inline void ArrayPool<T>::getPtr(ConstPtr<T> & ptr) const { Uint32 i = ptr.i; - if(i < size){ + if(likely(i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); @@ -316,15 +360,18 @@ inline void ArrayPool<T>::getPtr(Ptr<T> & ptr, Uint32 i){ ptr.i = i; - if(i < size){ + if(likely(i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); @@ -336,15 +383,18 @@ inline void ArrayPool<T>::getPtr(ConstPtr<T> & ptr, Uint32 i) const { ptr.i = i; - if(i < size){ + if(likely(i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); @@ -355,18 +405,20 @@ template <class T> inline T * ArrayPool<T>::getPtr(Uint32 i){ - if(i < size){ + if(likely(i < size)){ #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return &theArray[i]; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); - return 0; -#else - return &theArray[i]; + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return &theArray[i]; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + return 0; + } #endif + return &theArray[i]; } else { ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); return 0; @@ -377,18 +429,20 @@ template <class T> inline const T * ArrayPool<T>::getConstPtr(Uint32 i) const { - if(i < size){ + if(likely(i < size)){ #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return &theArray[i]; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); - return 0; -#else - return &theArray[i]; + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return &theArray[i]; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + return 0; + } #endif + return &theArray[i]; } else { ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); return 0; @@ -400,15 +454,18 @@ inline void ArrayPool<T>::getPtr(Ptr<T> & ptr, bool CrashOnBoundaryError){ Uint32 i = ptr.i; - if(i < size){ + if(likely(i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ptr.i = RNIL; @@ -420,15 +477,18 @@ inline void ArrayPool<T>::getPtr(ConstPtr<T> & ptr, bool CrashOnBoundaryError) const { Uint32 i = ptr.i; - if(i < size){ + if(likely(i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ptr.i = RNIL; @@ -440,15 +500,18 @@ inline void ArrayPool<T>::getPtr(Ptr<T> & ptr, Uint32 i, bool CrashOnBoundaryError){ ptr.i = i; - if(i < size){ + if(likely(i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ptr.i = RNIL; @@ -461,15 +524,18 @@ void ArrayPool<T>::getPtr(ConstPtr<T> & ptr, Uint32 i, bool CrashOnBoundaryError) const { ptr.i = i; - if(i < size){ + if(likely(i < size)){ ptr.p = &theArray[i]; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + } #endif } else { ptr.i = RNIL; @@ -480,18 +546,20 @@ template <class T> inline T * ArrayPool<T>::getPtr(Uint32 i, bool CrashOnBoundaryError){ - if(i < size){ + if(likely(i < size)){ #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return &theArray[i]; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); - return 0; -#else - return &theArray[i]; + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return &theArray[i]; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getPtr", __FILE__, __LINE__); + return 0; + } #endif + return &theArray[i]; } else { return 0; } @@ -501,18 +569,20 @@ template <class T> inline const T * ArrayPool<T>::getConstPtr(Uint32 i, bool CrashOnBoundaryError) const { - if(i < size){ + if(likely(i < size)){ #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) - return &theArray[i]; - /** - * Getting a non-seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::getConstPtr", __FILE__,__LINE__); - return 0; -#else - return &theArray[i]; + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)) + return &theArray[i]; + /** + * Getting a non-seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::getConstPtr", __FILE__,__LINE__); + return 0; + } #endif + return &theArray[i]; } else { return 0; } @@ -534,21 +604,23 @@ ArrayPool<T>::seize(Ptr<T> & ptr){ ptr.i = ff; ptr.p = &theArray[ff]; #ifdef ARRAY_GUARD - if(!BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, ff)){ - BitmaskImpl::set(bitmaskSz, theAllocatedBitmask, ff); - noOfFree--; - return true; - } else { - /** - * Seizing an already seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::seize", __FILE__, __LINE__); - return false; + if (theAllocatedBitmask) + { + if(!BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, ff)){ + BitmaskImpl::set(bitmaskSz, theAllocatedBitmask, ff); + noOfFree--; + return true; + } else { + /** + * Seizing an already seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::seize", __FILE__, __LINE__); + return false; + } } -#else +#endif noOfFree--; return true; -#endif } ptr.i = RNIL; ptr.p = NULL; @@ -575,21 +647,23 @@ ArrayPool<T>::seizeId(Ptr<T> & ptr, Uint32 i){ ptr.i = ff; ptr.p = &theArray[ff]; #ifdef ARRAY_GUARD - if(!BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, ff)){ - BitmaskImpl::set(bitmaskSz, theAllocatedBitmask, ff); - noOfFree--; - return true; - } else { - /** - * Seizing an already seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::seizeId", __FILE__, __LINE__); - return false; + if (theAllocatedBitmask) + { + if(!BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, ff)){ + BitmaskImpl::set(bitmaskSz, theAllocatedBitmask, ff); + noOfFree--; + return true; + } else { + /** + * Seizing an already seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::seizeId", __FILE__, __LINE__); + return false; + } } -#else +#endif noOfFree--; return true; -#endif } ptr.i = RNIL; ptr.p = NULL; @@ -636,15 +710,18 @@ ArrayPool<T>::seizeN(Uint32 n){ noOfFree -= n; #ifdef ARRAY_GUARD - for(Uint32 j = base; j<curr; j++){ - if(!BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, j)){ - BitmaskImpl::set(bitmaskSz, theAllocatedBitmask, j); - } else { - /** - * Seizing an already seized element - */ - ErrorReporter::handleAssert("ArrayPool<T>::seize", __FILE__, __LINE__); - return RNIL; + if (theAllocatedBitmask) + { + for(Uint32 j = base; j<curr; j++){ + if(!BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, j)){ + BitmaskImpl::set(bitmaskSz, theAllocatedBitmask, j); + } else { + /** + * Seizing an already seized element + */ + ErrorReporter::handleAssert("ArrayPool<T>::seize", __FILE__, __LINE__); + return RNIL; + } } } #endif @@ -669,14 +746,17 @@ ArrayPool<T>::releaseN(Uint32 base, Uint32 n){ const Uint32 end = base + n; for(Uint32 i = base; i<end; i++){ #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)){ - BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, i); - } else { - /** - * Relesing a already released element - */ - ErrorReporter::handleAssert("ArrayPool<T>::release", __FILE__, __LINE__); - return; + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)){ + BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, i); + } else { + /** + * Relesing a already released element + */ + ErrorReporter::handleAssert("ArrayPool<T>::release", __FILE__, __LINE__); + return; + } } #endif theArray[i].nextPool = i + 1; @@ -697,19 +777,22 @@ ArrayPool<T>::releaseList(Uint32 n, Uint32 first, Uint32 last){ noOfFree += n; #ifdef ARRAY_GUARD - Uint32 tmp = first; - for(Uint32 i = 0; i<n; i++){ - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, tmp)){ - BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, tmp); - } else { - /** - * Relesing a already released element - */ - ErrorReporter::handleAssert("ArrayPool<T>::releaseList", - __FILE__, __LINE__); - return; + if (theAllocatedBitmask) + { + Uint32 tmp = first; + for(Uint32 i = 0; i<n; i++){ + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, tmp)){ + BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, tmp); + } else { + /** + * Relesing a already released element + */ + ErrorReporter::handleAssert("ArrayPool<T>::releaseList", + __FILE__, __LINE__); + return; + } + tmp = theArray[tmp].nextPool; } - tmp = theArray[tmp].nextPool; } #endif return; @@ -725,21 +808,24 @@ inline void ArrayPool<T>::release(Uint32 _i){ const Uint32 i = _i; - if(i < size){ + if(likely(i < size)){ Uint32 ff = firstFree; theArray[i].nextPool = ff; firstFree = i; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)){ - BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, i); - noOfFree++; - return; + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)){ + BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, i); + noOfFree++; + return; + } + /** + * Relesing a already released element + */ + ErrorReporter::handleAssert("ArrayPool<T>::release", __FILE__, __LINE__); } - /** - * Relesing a already released element - */ - ErrorReporter::handleAssert("ArrayPool<T>::release", __FILE__, __LINE__); #endif noOfFree++; return; @@ -755,22 +841,25 @@ inline void ArrayPool<T>::release(Ptr<T> & ptr){ Uint32 i = ptr.i; - if(i < size){ + if(likely(i < size)){ Uint32 ff = firstFree; theArray[i].nextPool = ff; firstFree = i; #ifdef ARRAY_GUARD - if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)){ - BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, i); - //assert(noOfFree() == noOfFree2()); - noOfFree++; - return; + if (theAllocatedBitmask) + { + if(BitmaskImpl::get(bitmaskSz, theAllocatedBitmask, i)){ + BitmaskImpl::clear(bitmaskSz, theAllocatedBitmask, i); + //assert(noOfFree() == noOfFree2()); + noOfFree++; + return; + } + /** + * Relesing a already released element + */ + ErrorReporter::handleAssert("ArrayPool<T>::release", __FILE__, __LINE__); } - /** - * Relesing a already released element - */ - ErrorReporter::handleAssert("ArrayPool<T>::release", __FILE__, __LINE__); #endif noOfFree++; return; @@ -798,7 +887,7 @@ inline void UnsafeArrayPool<T>::getPtrForce(Ptr<T> & ptr){ Uint32 i = ptr.i; - if(i < this->size){ + if(likely(i < this->size)){ ptr.p = &this->theArray[i]; } else { ErrorReporter::handleAssert("UnsafeArrayPool<T>::getPtr", @@ -811,7 +900,7 @@ inline void UnsafeArrayPool<T>::getPtrForce(ConstPtr<T> & ptr) const{ Uint32 i = ptr.i; - if(i < this->size){ + if(likely(i < this->size)){ ptr.p = &this->theArray[i]; } else { ErrorReporter::handleAssert("UnsafeArrayPool<T>::getPtr", @@ -823,7 +912,7 @@ template <class T> inline T * UnsafeArrayPool<T>::getPtrForce(Uint32 i){ - if(i < this->size){ + if(likely(i < this->size)){ return &this->theArray[i]; } else { ErrorReporter::handleAssert("UnsafeArrayPool<T>::getPtr", @@ -836,7 +925,7 @@ template <class T> inline const T * UnsafeArrayPool<T>::getConstPtrForce(Uint32 i) const { - if(i < this->size){ + if(likely(i < this->size)){ return &this->theArray[i]; } else { ErrorReporter::handleAssert("UnsafeArrayPool<T>::getPtr", @@ -850,7 +939,7 @@ inline void UnsafeArrayPool<T>::getPtrForce(Ptr<T> & ptr, Uint32 i){ ptr.i = i; - if(i < this->size){ + if(likely(i < this->size)){ ptr.p = &this->theArray[i]; return ; } else { @@ -864,7 +953,7 @@ inline void UnsafeArrayPool<T>::getPtrForce(ConstPtr<T> & ptr, Uint32 i) const{ ptr.i = i; - if(i < this->size){ + if(likely(i < this->size)){ ptr.p = &this->theArray[i]; return ; } else { diff --git a/storage/ndb/src/kernel/vm/DLList.hpp b/storage/ndb/src/kernel/vm/DLList.hpp index 337f9388f74..e3e7332d35c 100644 --- a/storage/ndb/src/kernel/vm/DLList.hpp +++ b/storage/ndb/src/kernel/vm/DLList.hpp @@ -91,6 +91,12 @@ public: * @NOTE MUST be seized from correct pool */ void add(Ptr<T> &); + + /** + * Add a list to list + * @NOTE all elements _must_ be correctly initilized correctly wrt next/prev + */ + void add(Uint32 first, Ptr<T> & last); /** * Remove object from list @@ -98,6 +104,13 @@ public: * @NOTE Does not return it to pool */ void remove(Ptr<T> &); + + /** + * Remove object from list + * + * @NOTE Does not return it to pool + */ + void remove(T*); /** * Update i & p value according to <b>i</b> @@ -256,19 +269,42 @@ DLList<T,U>::add(Ptr<T> & p){ template <class T, class U> inline void +DLList<T,U>::add(Uint32 first, Ptr<T> & lastPtr) +{ + Uint32 ff = head.firstItem; + + head.firstItem = first; + lastPtr.p->U::nextList = ff; + + if(ff != RNIL){ + T * t2 = thePool.getPtr(ff); + t2->U::prevList = lastPtr.i; + } +} + +template <class T, class U> +inline +void DLList<T,U>::remove(Ptr<T> & p){ - T * t = p.p; + remove(p.p); +} + +template <class T, class U> +inline +void +DLList<T,U>::remove(T * p){ + T * t = p; Uint32 ni = t->U::nextList; Uint32 pi = t->U::prevList; if(ni != RNIL){ - T * t = thePool.getPtr(ni); - t->U::prevList = pi; + T * tn = thePool.getPtr(ni); + tn->U::prevList = pi; } if(pi != RNIL){ - T * t = thePool.getPtr(pi); - t->U::nextList = ni; + T * tp = thePool.getPtr(pi); + tp->U::nextList = ni; } else { head.firstItem = ni; } diff --git a/storage/ndb/src/kernel/vm/Makefile.am b/storage/ndb/src/kernel/vm/Makefile.am index 3f3230b0694..f2bef8ba1f7 100644 --- a/storage/ndb/src/kernel/vm/Makefile.am +++ b/storage/ndb/src/kernel/vm/Makefile.am @@ -20,7 +20,7 @@ libkernel_a_SOURCES = \ Mutex.cpp SafeCounter.cpp \ Rope.cpp \ SuperPool.cpp \ - ndbd_malloc.cpp + ndbd_malloc.cpp ndbd_malloc_impl.cpp INCLUDES_LOC = -I$(top_srcdir)/storage/ndb/src/mgmapi @@ -43,3 +43,12 @@ libkernel.dsp: Makefile \ @$(top_srcdir)/storage/ndb/config/win-includes $@ $(INCLUDES) @$(top_srcdir)/storage/ndb/config/win-sources $@ $(libkernel_a_SOURCES) @$(top_srcdir)/storage/ndb/config/win-libraries $@ LIB $(LDADD) + +EXTRA_PROGRAMS = ndbd_malloc_impl_test +ndbd_malloc_impl_test_CXXFLAGS = -DUNIT_TEST +ndbd_malloc_impl_test_SOURCES = ndbd_malloc_impl.cpp +ndbd_malloc_impl_test_LDFLAGS = @ndb_bin_am_ldflags@ \ + $(top_builddir)/storage/ndb/src/libndbclient.la \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a diff --git a/storage/ndb/src/kernel/vm/SLList.hpp b/storage/ndb/src/kernel/vm/SLList.hpp index 9758f9a66c4..7d724a18547 100644 --- a/storage/ndb/src/kernel/vm/SLList.hpp +++ b/storage/ndb/src/kernel/vm/SLList.hpp @@ -120,6 +120,19 @@ public: head.firstItem = p.i; } + /** + * Add a list to list + * @NOTE all elements _must_ be correctly initilized correctly wrt next/prev + */ + void add(Uint32 first, Ptr<T> & last); + + /** + * Remove object from list + * + * @NOTE Does not return it to pool + */ + bool remove_front(Ptr<T> &); + Uint32 noOfElements() const { Uint32 c = 0; Uint32 i = head.firstItem; @@ -248,6 +261,28 @@ SLList<T,U>::remove(){ template <class T, class U> inline +bool +SLList<T,U>::remove_front(Ptr<T> & p){ + p.i = head.firstItem; + if (p.i != RNIL) + { + p.p = thePool.getPtr(p.i); + head.firstItem = p.p->U::nextList; + return true; + } + return false; +} + +template <class T, class U> +inline +void +SLList<T,U>::add(Uint32 first, Ptr<T> & last){ + last.p->U::nextList = head.firstItem; + head.firstItem = first; +} + +template <class T, class U> +inline void SLList<T,U>::release(){ while(head.firstItem != RNIL){ diff --git a/storage/ndb/src/kernel/vm/mem.txt b/storage/ndb/src/kernel/vm/mem.txt new file mode 100644 index 00000000000..0ab0ff2a5a9 --- /dev/null +++ b/storage/ndb/src/kernel/vm/mem.txt @@ -0,0 +1,34 @@ +Structure + +Ndbd_mem_allocator +Handles allocation on 32k blocks +Provides Buddy allocation of max 8G objects + +SuperPool 8k (chunk size 64k) +GroupPool - Metadata + DICT::RopePool + +GroupPool - Disk operations + TUP::Page request + PGMAN::Page request + LGMAN::Log waiter + LGMAN::Log syncer + +GroupPool - Disk space + Tsman::Datafile + Tsman::Tablespace + Lgman::Undofile + Lgman::Logfilegroup + TUP::Extent alloc info + +SuperPool 32k + TUP Undo buffer + SUMA GCI buffer + Pgman::PageEntry + +Direct Ndbd_mem_allocator + Pgman::GlobalPage - + TUP PageMan + Restore + Backup + Lgman::Logbuffer diff --git a/storage/ndb/src/kernel/vm/ndbd_malloc_impl.cpp b/storage/ndb/src/kernel/vm/ndbd_malloc_impl.cpp new file mode 100644 index 00000000000..ca3e072f322 --- /dev/null +++ b/storage/ndb/src/kernel/vm/ndbd_malloc_impl.cpp @@ -0,0 +1,410 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + + +#include "ndbd_malloc_impl.hpp" +#include <ndb_global.h> + +Uint32 +Ndbd_mem_manager::log2(Uint32 input) +{ + input = input | (input >> 8); + input = input | (input >> 4); + input = input | (input >> 2); + input = input | (input >> 1); + Uint32 output = (input & 0x5555) + ((input >> 1) & 0x5555); + output = (output & 0x3333) + ((output >> 2) & 0x3333); + output = output + (output >> 4); + output = (output & 0xf) + ((output >> 8) & 0xf); + return output; +} + +Ndbd_mem_manager::Ndbd_mem_manager(Uint32 default_grow) +{ + m_grow_size = default_grow; + bzero(m_buddy_lists, sizeof(m_buddy_lists)); + m_base = 0; + m_base_page = 0; + + m_pages_alloc = 0; + m_pages_used = 0; + + if (sizeof(Free_page_data) != (4 * (1 << FPD_2LOG))) + { + abort(); + } +} + +bool +Ndbd_mem_manager::init(Uint32 pages) +{ + assert(m_base == 0); + assert(m_base_page == 0); + assert(m_pages_alloc == 0); + pages = pages ? pages : m_grow_size; + + m_base = malloc((2 + pages) * sizeof(Alloc_page)); + UintPtr ptr = (UintPtr)m_base; + UintPtr rem = ptr % sizeof(Alloc_page); + if (rem) + { + ptr += sizeof(Alloc_page) - rem; + } + else + { + pages++; + } + m_base_page = (Alloc_page*)ptr; + m_pages_alloc += pages; + m_pages_used += pages; + + Uint32 bmp = (pages + (1 << BPP_2LOG) - 1) >> BPP_2LOG; + for(Uint32 i = 0; i < bmp; i++) + { + Uint32 start = i * (1 << BPP_2LOG); + Uint32 end = start + (1 << BPP_2LOG); + end = end > m_pages_alloc ? m_pages_alloc : end - 1; + Alloc_page *ptr = m_base_page + start; + BitmaskImpl::clear(BITMAP_WORDS, ptr->m_data); + + release(start+1, end - 1 - start); + } +} + +void +Ndbd_mem_manager::release(Uint32 start, Uint32 cnt) +{ + assert(m_pages_used >= cnt); + assert(start); + m_pages_used -= cnt; + + set(start, start+cnt-1); + + release_impl(start, cnt); +} + +void +Ndbd_mem_manager::release_impl(Uint32 start, Uint32 cnt) +{ + assert(start); + + Uint32 test = check(start-1, start+cnt); + if (test & 1) + { + Free_page_data *fd = get_free_page_data(m_base_page + start - 1, + start - 1); + Uint32 sz = fd->m_size; + Uint32 left = start - sz; + remove_free_list(left, fd->m_list); + cnt += sz; + start = left; + } + + Uint32 right = start + cnt; + if (test & 2) + { + Free_page_data *fd = get_free_page_data(m_base_page+right, right); + Uint32 sz = fd->m_size; + remove_free_list(right, fd->m_list); + cnt += sz; + } + + insert_free_list(start, cnt); +} + +void +Ndbd_mem_manager::alloc(Uint32* ret, Uint32 *pages, Uint32 min) +{ + Uint32 start, i; + Uint32 cnt = * pages; + Uint32 list = log2(cnt - 1); + + assert(cnt); + assert(list <= 16); + + for (i = list; i < 16; i++) + { + if ((start = m_buddy_lists[i])) + { +/* ---------------------------------------------------------------- */ +/* PROPER AMOUNT OF PAGES WERE FOUND. NOW SPLIT THE FOUND */ +/* AREA AND RETURN THE PART NOT NEEDED. */ +/* ---------------------------------------------------------------- */ + + Uint32 sz = remove_free_list(start, i); + Uint32 extra = sz - cnt; + assert(sz >= cnt); + if (extra) + { + insert_free_list(start + cnt, extra); + clear_and_set(start, start+cnt-1); + } + else + { + clear(start, start+cnt-1); + } + * ret = start; + m_pages_used += cnt; + assert(m_pages_used <= m_pages_alloc); + return; + } + } + + /** + * Could not find in quaranteed list... + * search in other lists... + */ + + Uint32 min_list = log2(min - 1); + assert(list >= min_list); + for (i = list - 1; i >= min_list; i--) + { + if ((start = m_buddy_lists[i])) + { + Uint32 sz = remove_free_list(start, i); + Uint32 extra = sz - cnt; + if (sz > cnt) + { + insert_free_list(start + cnt, extra); + sz -= extra; + clear_and_set(start, start+sz-1); + } + else + { + clear(start, start+sz-1); + } + + * ret = start; + * pages = sz; + m_pages_used += sz; + assert(m_pages_used <= m_pages_alloc); + return; + } + } + * pages = 0; +} + +void +Ndbd_mem_manager::insert_free_list(Uint32 start, Uint32 size) +{ + Uint32 list = log2(size) - 1; + Uint32 last = start + size - 1; + + Uint32 head = m_buddy_lists[list]; + Free_page_data* fd_first = get_free_page_data(m_base_page+start, + start); + fd_first->m_list = list; + fd_first->m_next = head; + fd_first->m_prev = 0; + fd_first->m_size = size; + + Free_page_data* fd_last = get_free_page_data(m_base_page+last, last); + fd_last->m_list = list; + fd_last->m_next = head; + fd_last->m_prev = 0; + fd_last->m_size = size; + + if (head) + { + Free_page_data* fd = get_free_page_data(m_base_page+head, head); + assert(fd->m_prev == 0); + assert(fd->m_list == list); + fd->m_prev = start; + } + + m_buddy_lists[list] = start; +} + +Uint32 +Ndbd_mem_manager::remove_free_list(Uint32 start, Uint32 list) +{ + Free_page_data* fd = get_free_page_data(m_base_page+start, start); + Uint32 size = fd->m_size; + Uint32 next = fd->m_next; + Uint32 prev = fd->m_prev; + assert(fd->m_list == list); + + if (prev) + { + assert(m_buddy_lists[list] != start); + fd = get_free_page_data(m_base_page+prev, prev); + assert(fd->m_next == start); + assert(fd->m_list == list); + fd->m_next = next; + } + else + { + assert(m_buddy_lists[list] == start); + m_buddy_lists[list] = next; + } + + if (next) + { + fd = get_free_page_data(m_base_page+next, next); + assert(fd->m_list == list); + assert(fd->m_prev == start); + fd->m_prev = prev; + } + + return size; +} + +Uint32 +Ndbd_mem_manager::get_no_allocated_pages() const +{ + return m_pages_alloc; +} + +Uint32 +Ndbd_mem_manager::get_no_used_pages() const +{ + return m_pages_used; +} + +Uint32 +Ndbd_mem_manager::get_no_free_pages() const +{ + return m_pages_alloc - m_pages_used; +} + + +void +Ndbd_mem_manager::dump() const +{ + for(Uint32 i = 0; i<16; i++) + { + printf(" list: %d - ", i); + Uint32 head = m_buddy_lists[i]; + while(head) + { + Free_page_data* fd = get_free_page_data(m_base_page+head, head); + printf("[ i: %d prev %d next %d list %d size %d ] ", + head, fd->m_prev, fd->m_next, fd->m_list, fd->m_size); + head = fd->m_next; + } + printf("EOL\n"); + } +} + +#ifdef UNIT_TEST + +#include <Vector.hpp> + +struct Chunk { + Uint32 pageId; + Uint32 pageCount; +}; + +int +main(void) +{ + printf("Startar modul test av Page Manager\n"); +#define DEBUG 0 + + Ndbd_mem_manager mem; + mem.init(32000); + Vector<Chunk> chunks; + const Uint32 LOOPS = 100000; + for(Uint32 i = 0; i<LOOPS; i++){ + //mem.dump(); + + // Case + Uint32 c = (rand() % 100); + const Uint32 free = mem.get_no_allocated_pages() - mem.get_no_used_pages(); + if (c < 60) + { + c = 0; + } + else if (c < 93) + { + c = 1; + } + else + { + c = 2; + } + + Uint32 alloc = 0; + if(free <= 1) + { + c = 0; + alloc = 1; + } + else + { + alloc = 1 + (rand() % (free - 1)); + } + + if(chunks.size() == 0 && c == 0) + { + c = 1 + rand() % 2; + } + + if(DEBUG) + printf("loop=%d ", i); + switch(c){ + case 0:{ // Release + const int ch = rand() % chunks.size(); + Chunk chunk = chunks[ch]; + chunks.erase(ch); + mem.release(chunk.pageId, chunk.pageCount); + if(DEBUG) + printf(" release %d %d\n", chunk.pageId, chunk.pageCount); + } + break; + case 2: { // Seize(n) - fail + alloc += free; + // Fall through + } + case 1: { // Seize(n) (success) + Chunk chunk; + chunk.pageCount = alloc; + mem.alloc(&chunk.pageId, &chunk.pageCount, 1); + if (DEBUG) + printf(" alloc %d -> %d %d", alloc, chunk.pageId, chunk.pageCount); + assert(chunk.pageCount <= alloc); + if(chunk.pageCount != 0){ + chunks.push_back(chunk); + if(chunk.pageCount != alloc) { + if (DEBUG) + printf(" - Tried to allocate %d - only allocated %d - free: %d", + alloc, chunk.pageCount, free); + } + } else { + if (DEBUG) + printf(" Failed to alloc %d pages with %d pages free", + alloc, free); + } + if (DEBUG) + printf("\n"); + if(alloc == 1 && free > 0) + assert(chunk.pageCount == alloc); + } + break; + } + } + if (!DEBUG) + while(chunks.size() > 0){ + Chunk chunk = chunks.back(); + mem.release(chunk.pageId, chunk.pageCount); + chunks.erase(chunks.size() - 1); + } +} + +template class Vector<Chunk>; + +#endif diff --git a/storage/ndb/src/kernel/vm/ndbd_malloc_impl.hpp b/storage/ndb/src/kernel/vm/ndbd_malloc_impl.hpp new file mode 100644 index 00000000000..e497f62b92a --- /dev/null +++ b/storage/ndb/src/kernel/vm/ndbd_malloc_impl.hpp @@ -0,0 +1,187 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef NDBD_MALLOC_IMPL_H +#define NDBD_MALLOC_IMPL_H + +#include <kernel_types.h> +#include <Bitmask.hpp> +#include <assert.h> + +/** + * 13 -> 8192 words -> 32768 bytes + * 18 -> 262144 words -> 1M + */ +#define BMW_2LOG 13 +#define BITMAP_WORDS (1 << BMW_2LOG) + +#define BPP_2LOG (BMW_2LOG + 5) +#define SPACE_PER_BMP_2LOG ((2 + BMW_2LOG) + BPP_2LOG) + +//#define BITMAP_WORDS GLOBAL_PAGE_SIZE_WORDS + +struct Alloc_page +{ + Uint32 m_data[BITMAP_WORDS]; +}; + +struct Free_page_data +{ + Uint32 m_list; + Uint32 m_next; + Uint32 m_prev; + Uint32 m_size; +}; + +#define FPD_2LOG 2 + +class Ndbd_mem_manager +{ +public: + Ndbd_mem_manager(Uint32 default_grow = 32); + + void alloc(Uint32* ret, Uint32 *pages, Uint32 min_requested); + void release(Uint32 start, Uint32 cnt); + + Uint32 get_no_allocated_pages() const; + Uint32 get_no_used_pages() const; + Uint32 get_no_free_pages() const; + + bool init(Uint32 pages = 0); + bool grow(Uint32 pages = 0); + + void dump() const ; +private: + + /** + * Compute 2log of size + * @note size = 0 -> 0 + * @note size > 65536 -> 16 + */ + static Uint32 log2(Uint32 size); + + /** + * Return pointer to free page data on page + */ + static Free_page_data* get_free_page_data(Alloc_page*, Uint32 idx); + + Uint32 m_pages_alloc; + Uint32 m_pages_used; + + Uint32 m_grow_size; + Uint32 m_buddy_lists[16]; + + void * m_base; + Alloc_page * m_base_page; + + void release_impl(Uint32 start, Uint32 cnt); + void insert_free_list(Uint32 start, Uint32 cnt); + Uint32 remove_free_list(Uint32 start, Uint32 list); + + void set(Uint32 first, Uint32 last); + void clear(Uint32 first, Uint32 last); + void clear_and_set(Uint32 first, Uint32 last); + Uint32 check(Uint32 first, Uint32 last); +}; + +inline +Free_page_data* +Ndbd_mem_manager::get_free_page_data(Alloc_page* ptr, Uint32 idx) +{ + assert(idx & ((1 << BPP_2LOG) - 1)); + assert((idx & ((1 << BPP_2LOG) - 1)) != ((1 << BPP_2LOG) - 1)); + + return (Free_page_data*) + (ptr->m_data + ((idx & ((BITMAP_WORDS >> FPD_2LOG) - 1)) << FPD_2LOG)); +} + +inline +void +Ndbd_mem_manager::set(Uint32 first, Uint32 last) +{ + Alloc_page * ptr = m_base_page; +#if ((SPACE_PER_BMP_2LOG < 32) && (SIZEOF_CHARP == 4)) || (SIZEOF_CHARP == 8) + Uint32 bmp = first & ~((1 << BPP_2LOG) - 1); + assert((first >> BPP_2LOG) == (last >> BPP_2LOG)); + assert(bmp < m_pages_alloc); + + first -= bmp; + last -= bmp; + ptr += bmp; +#endif + BitmaskImpl::set(BITMAP_WORDS, ptr->m_data, first); + BitmaskImpl::set(BITMAP_WORDS, ptr->m_data, last); +} + +inline +void +Ndbd_mem_manager::clear(Uint32 first, Uint32 last) +{ + Alloc_page * ptr = m_base_page; +#if ((SPACE_PER_BMP_2LOG < 32) && (SIZEOF_CHARP == 4)) || (SIZEOF_CHARP == 8) + Uint32 bmp = first & ~((1 << BPP_2LOG) - 1); + assert((first >> BPP_2LOG) == (last >> BPP_2LOG)); + assert(bmp < m_pages_alloc); + + first -= bmp; + last -= bmp; + ptr += bmp; +#endif + BitmaskImpl::clear(BITMAP_WORDS, ptr->m_data, first); + BitmaskImpl::clear(BITMAP_WORDS, ptr->m_data, last); +} + +inline +void +Ndbd_mem_manager::clear_and_set(Uint32 first, Uint32 last) +{ + Alloc_page * ptr = m_base_page; +#if ((SPACE_PER_BMP_2LOG < 32) && (SIZEOF_CHARP == 4)) || (SIZEOF_CHARP == 8) + Uint32 bmp = first & ~((1 << BPP_2LOG) - 1); + assert((first >> BPP_2LOG) == (last >> BPP_2LOG)); + assert(bmp < m_pages_alloc); + + first -= bmp; + last -= bmp; + ptr += bmp; +#endif + BitmaskImpl::clear(BITMAP_WORDS, ptr->m_data, first); + BitmaskImpl::clear(BITMAP_WORDS, ptr->m_data, last); + BitmaskImpl::set(BITMAP_WORDS, ptr->m_data, last+1); +} + +inline +Uint32 +Ndbd_mem_manager::check(Uint32 first, Uint32 last) +{ + Uint32 ret = 0; + Alloc_page * ptr = m_base_page; +#if ((SPACE_PER_BMP_2LOG < 32) && (SIZEOF_CHARP == 4)) || (SIZEOF_CHARP == 8) + Uint32 bmp = first & ~((1 << BPP_2LOG) - 1); + assert((first >> BPP_2LOG) == (last >> BPP_2LOG)); + assert(bmp < m_pages_alloc); + + first -= bmp; + last -= bmp; + ptr += bmp; +#endif + ret |= BitmaskImpl::get(BITMAP_WORDS, ptr->m_data, first) << 0; + ret |= BitmaskImpl::get(BITMAP_WORDS, ptr->m_data, last) << 1; + return ret; +} + + +#endif diff --git a/storage/ndb/src/ndbapi/NdbBlob.cpp b/storage/ndb/src/ndbapi/NdbBlob.cpp index edfdd9ec836..8d098a9f493 100644 --- a/storage/ndb/src/ndbapi/NdbBlob.cpp +++ b/storage/ndb/src/ndbapi/NdbBlob.cpp @@ -78,6 +78,8 @@ NdbBlob::getBlobTable(NdbTableImpl& bt, const NdbTableImpl* t, const NdbColumnIm must be hash based so that the kernel can handle it on its own. */ bt.m_primaryTableId = t->m_id; + bt.m_tablespace_id = t->m_tablespace_id; + bt.m_tablespace_version = t->m_tablespace_version; bt.m_ng.clear(); switch (t->getFragmentType()) { diff --git a/storage/ndb/src/ndbapi/NdbDictionary.cpp b/storage/ndb/src/ndbapi/NdbDictionary.cpp index b56bd778855..3c42e232846 100644 --- a/storage/ndb/src/ndbapi/NdbDictionary.cpp +++ b/storage/ndb/src/ndbapi/NdbDictionary.cpp @@ -466,6 +466,11 @@ NdbDictionary::Table::getObjectVersion() const { return m_impl.m_version; } +int +NdbDictionary::Table::getObjectId() const { + return m_impl.m_id; +} + bool NdbDictionary::Table::equal(const NdbDictionary::Table & col) const { return m_impl.equal(col.m_impl); @@ -497,6 +502,12 @@ NdbDictionary::Table::createTableInDb(Ndb* pNdb, bool equalOk) const { return pNdb->getDictionary()->createTable(* this); } +Uint32 +NdbDictionary::Table::getTablespaceId() const +{ + return m_impl.m_tablespace_id; +} + void NdbDictionary::Table::setTablespace(const char * name){ m_impl.m_tablespace_id = ~0; @@ -511,6 +522,27 @@ NdbDictionary::Table::setTablespace(const NdbDictionary::Tablespace & ts){ m_impl.m_tablespace_name.assign(ts.getName()); } +void +NdbDictionary::Table::setRowChecksumIndicator(bool val){ + m_impl.m_row_checksum = val; +} + +bool +NdbDictionary::Table::getRowChecksumIndicator() const { + return m_impl.m_row_checksum; +} + +void +NdbDictionary::Table::setRowGCIIndicator(bool val){ + m_impl.m_row_gci = val; +} + +bool +NdbDictionary::Table::getRowGCIIndicator() const { + return m_impl.m_row_gci; +} + + /***************************************************************** * Index facade */ @@ -644,6 +676,12 @@ NdbDictionary::Index::getObjectVersion() const { return m_impl.m_version; } +int +NdbDictionary::Index::getObjectId() const { + return m_impl.m_id; +} + + /***************************************************************** * Event facade */ @@ -765,6 +803,11 @@ NdbDictionary::Event::getObjectVersion() const return m_impl.m_version; } +int +NdbDictionary::Event::getObjectId() const { + return m_impl.m_id; +} + void NdbDictionary::Event::print() { m_impl.print(); @@ -783,6 +826,12 @@ NdbDictionary::Tablespace::Tablespace(NdbTablespaceImpl & impl) { } +NdbDictionary::Tablespace::Tablespace(const NdbDictionary::Tablespace & org) + : Object(org), m_impl(* new NdbTablespaceImpl(* this)) +{ + m_impl.assign(org.m_impl); +} + NdbDictionary::Tablespace::~Tablespace(){ NdbTablespaceImpl * tmp = &m_impl; if(this != tmp){ @@ -840,6 +889,11 @@ NdbDictionary::Tablespace::getDefaultLogfileGroup() const { return m_impl.m_logfile_group_name.c_str(); } +Uint32 +NdbDictionary::Tablespace::getDefaultLogfileGroupId() const { + return m_impl.m_logfile_group_id; +} + NdbDictionary::Object::Status NdbDictionary::Tablespace::getObjectStatus() const { return m_impl.m_status; @@ -850,6 +904,11 @@ NdbDictionary::Tablespace::getObjectVersion() const { return m_impl.m_version; } +int +NdbDictionary::Tablespace::getObjectId() const { + return m_impl.m_id; +} + /***************************************************************** * LogfileGroup facade */ @@ -863,6 +922,12 @@ NdbDictionary::LogfileGroup::LogfileGroup(NdbLogfileGroupImpl & impl) { } +NdbDictionary::LogfileGroup::LogfileGroup(const NdbDictionary::LogfileGroup & org) + : Object(org), m_impl(* new NdbLogfileGroupImpl(* this)) +{ + m_impl.assign(org.m_impl); +} + NdbDictionary::LogfileGroup::~LogfileGroup(){ NdbLogfileGroupImpl * tmp = &m_impl; if(this != tmp){ @@ -900,6 +965,10 @@ NdbDictionary::LogfileGroup::getAutoGrowSpecification() const { return m_impl.m_grow_spec; } +Uint64 NdbDictionary::LogfileGroup::getUndoFreeWords() const { + return m_impl.m_undo_free_words; +} + NdbDictionary::Object::Status NdbDictionary::LogfileGroup::getObjectStatus() const { return m_impl.m_status; @@ -910,6 +979,11 @@ NdbDictionary::LogfileGroup::getObjectVersion() const { return m_impl.m_version; } +int +NdbDictionary::LogfileGroup::getObjectId() const { + return m_impl.m_id; +} + /***************************************************************** * Datafile facade */ @@ -923,6 +997,12 @@ NdbDictionary::Datafile::Datafile(NdbDatafileImpl & impl) { } +NdbDictionary::Datafile::Datafile(const NdbDictionary::Datafile & org) + : Object(org), m_impl(* new NdbDatafileImpl(* this)) +{ + m_impl.assign(org.m_impl); +} + NdbDictionary::Datafile::~Datafile(){ NdbDatafileImpl * tmp = &m_impl; if(this != tmp){ @@ -974,6 +1054,11 @@ NdbDictionary::Datafile::getTablespace() const { return m_impl.m_filegroup_name.c_str(); } +Uint32 +NdbDictionary::Datafile::getTablespaceId() const { + return m_impl.m_filegroup_id; +} + NdbDictionary::Object::Status NdbDictionary::Datafile::getObjectStatus() const { return m_impl.m_status; @@ -984,6 +1069,11 @@ NdbDictionary::Datafile::getObjectVersion() const { return m_impl.m_version; } +int +NdbDictionary::Datafile::getObjectId() const { + return m_impl.m_id; +} + /***************************************************************** * Undofile facade */ @@ -997,6 +1087,12 @@ NdbDictionary::Undofile::Undofile(NdbUndofileImpl & impl) { } +NdbDictionary::Undofile::Undofile(const NdbDictionary::Undofile & org) + : Object(org), m_impl(* new NdbUndofileImpl(* this)) +{ + m_impl.assign(org.m_impl); +} + NdbDictionary::Undofile::~Undofile(){ NdbUndofileImpl * tmp = &m_impl; if(this != tmp){ @@ -1024,11 +1120,6 @@ NdbDictionary::Undofile::getSize() const { return m_impl.m_size; } -Uint64 -NdbDictionary::Undofile::getFree() const { - return m_impl.m_free; -} - void NdbDictionary::Undofile::setLogfileGroup(const char * logfileGroup){ m_impl.m_filegroup_id = ~0; @@ -1049,6 +1140,11 @@ NdbDictionary::Undofile::getLogfileGroup() const { return m_impl.m_filegroup_name.c_str(); } +Uint32 +NdbDictionary::Undofile::getLogfileGroupId() const { + return m_impl.m_filegroup_id; +} + NdbDictionary::Object::Status NdbDictionary::Undofile::getObjectStatus() const { return m_impl.m_status; @@ -1059,6 +1155,11 @@ NdbDictionary::Undofile::getObjectVersion() const { return m_impl.m_version; } +int +NdbDictionary::Undofile::getObjectId() const { + return m_impl.m_id; +} + /***************************************************************** * Dictionary facade */ @@ -1503,11 +1604,3 @@ NdbDictionary::Dictionary::getUndofile(Uint32 node, const char * path){ return tmp; } -const NdbDictionary::Column * NdbDictionary::Column::FRAGMENT = 0; -const NdbDictionary::Column * NdbDictionary::Column::FRAGMENT_MEMORY = 0; -const NdbDictionary::Column * NdbDictionary::Column::ROW_COUNT = 0; -const NdbDictionary::Column * NdbDictionary::Column::COMMIT_COUNT = 0; -const NdbDictionary::Column * NdbDictionary::Column::ROW_SIZE = 0; -const NdbDictionary::Column * NdbDictionary::Column::RANGE_NO = 0; -const NdbDictionary::Column * NdbDictionary::Column::DISK_REF = 0; -const NdbDictionary::Column * NdbDictionary::Column::RECORDS_IN_RANGE = 0; diff --git a/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp b/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp index 79780c62643..507fc3979c7 100644 --- a/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp +++ b/storage/ndb/src/ndbapi/NdbDictionaryImpl.cpp @@ -320,9 +320,21 @@ NdbColumnImpl::create_pseudo(const char * name){ col->m_impl.m_attrId = AttributeHeader::RECORDS_IN_RANGE; col->m_impl.m_attrSize = 4; col->m_impl.m_arraySize = 4; + } else if(!strcmp(name, "NDB$ROWID")){ + col->setType(NdbDictionary::Column::Bigunsigned); + col->m_impl.m_attrId = AttributeHeader::ROWID; + col->m_impl.m_attrSize = 4; + col->m_impl.m_arraySize = 2; + } else if(!strcmp(name, "NDB$ROW_GCI")){ + col->setType(NdbDictionary::Column::Bigunsigned); + col->m_impl.m_attrId = AttributeHeader::ROW_GCI; + col->m_impl.m_attrSize = 8; + col->m_impl.m_arraySize = 1; + col->m_impl.m_nullable = true; } else { abort(); } + col->m_impl.m_storageType = NDB_STORAGETYPE_MEMORY; return col; } @@ -378,6 +390,8 @@ NdbTableImpl::init(){ m_noOfBlobs= 0; m_replicaCount= 0; m_tablespace_id = ~0; + m_row_gci = true; + m_row_checksum = true; } bool @@ -891,6 +905,8 @@ NdbDictionaryImpl::~NdbDictionaryImpl() delete NdbDictionary::Column::RANGE_NO; delete NdbDictionary::Column::DISK_REF; delete NdbDictionary::Column::RECORDS_IN_RANGE; + delete NdbDictionary::Column::ROWID; + delete NdbDictionary::Column::ROW_GCI; NdbDictionary::Column::FRAGMENT= 0; NdbDictionary::Column::FRAGMENT_MEMORY= 0; NdbDictionary::Column::ROW_COUNT= 0; @@ -899,6 +915,8 @@ NdbDictionaryImpl::~NdbDictionaryImpl() NdbDictionary::Column::RANGE_NO= 0; NdbDictionary::Column::DISK_REF= 0; NdbDictionary::Column::RECORDS_IN_RANGE= 0; + NdbDictionary::Column::ROWID= 0; + NdbDictionary::Column::ROW_GCI= 0; } m_globalHash->unlock(); } else { @@ -975,6 +993,10 @@ NdbDictionaryImpl::setTransporter(class Ndb* ndb, NdbColumnImpl::create_pseudo("NDB$DISK_REF"); NdbDictionary::Column::RECORDS_IN_RANGE= NdbColumnImpl::create_pseudo("NDB$RECORDS_IN_RANGE"); + NdbDictionary::Column::ROWID= + NdbColumnImpl::create_pseudo("NDB$ROWID"); + NdbDictionary::Column::ROW_GCI= + NdbColumnImpl::create_pseudo("NDB$ROW_GCI"); } m_globalHash->unlock(); return true; @@ -1507,6 +1529,8 @@ NdbDictInterface::parseTableInfo(NdbTableImpl ** ret, (Uint32)NdbDictionary::Object::FragUndefined); impl->m_logging = tableDesc.TableLoggedFlag; + impl->m_row_gci = tableDesc.RowGCIFlag; + impl->m_row_checksum = tableDesc.RowChecksumFlag; impl->m_kvalue = tableDesc.TableKValue; impl->m_minLoadFactor = tableDesc.MinLoadFactor; impl->m_maxLoadFactor = tableDesc.MaxLoadFactor; @@ -1841,6 +1865,9 @@ NdbDictInterface::createOrAlterTable(Ndb & ndb, memcpy(tmpTab.FragmentData, impl.m_ng.get_data(), impl.m_ng.length()); tmpTab.TableLoggedFlag = impl.m_logging; + tmpTab.RowGCIFlag = impl.m_row_gci; + tmpTab.RowChecksumFlag = impl.m_row_checksum; + tmpTab.TableLoggedFlag = impl.m_logging; tmpTab.TableKValue = impl.m_kvalue; tmpTab.MinLoadFactor = impl.m_minLoadFactor; tmpTab.MaxLoadFactor = impl.m_maxLoadFactor; @@ -3404,6 +3431,24 @@ NdbTablespaceImpl::NdbTablespaceImpl(NdbDictionary::Tablespace & f) : NdbTablespaceImpl::~NdbTablespaceImpl(){ } +void +NdbTablespaceImpl::assign(const NdbTablespaceImpl& org) +{ + m_id = org.m_id; + m_version = org.m_version; + m_status = org.m_status; + m_type = org.m_type; + + m_name.assign(org.m_name); + m_grow_spec = org.m_grow_spec; + m_extent_size = org.m_extent_size; + m_undo_free_words = org.m_undo_free_words; + m_logfile_group_id = org.m_logfile_group_id; + m_logfile_group_version = org.m_logfile_group_version; + m_logfile_group_name.assign(org.m_logfile_group_name); + m_undo_free_words = org.m_undo_free_words; +} + NdbLogfileGroupImpl::NdbLogfileGroupImpl() : NdbDictionary::LogfileGroup(* this), NdbFilegroupImpl(NdbDictionary::Object::LogfileGroup), m_facade(this) @@ -3419,6 +3464,24 @@ NdbLogfileGroupImpl::NdbLogfileGroupImpl(NdbDictionary::LogfileGroup & f) : NdbLogfileGroupImpl::~NdbLogfileGroupImpl(){ } +void +NdbLogfileGroupImpl::assign(const NdbLogfileGroupImpl& org) +{ + m_id = org.m_id; + m_version = org.m_version; + m_status = org.m_status; + m_type = org.m_type; + + m_name.assign(org.m_name); + m_grow_spec = org.m_grow_spec; + m_extent_size = org.m_extent_size; + m_undo_free_words = org.m_undo_free_words; + m_logfile_group_id = org.m_logfile_group_id; + m_logfile_group_version = org.m_logfile_group_version; + m_logfile_group_name.assign(org.m_logfile_group_name); + m_undo_free_words = org.m_undo_free_words; +} + NdbFileImpl::NdbFileImpl(NdbDictionary::Object::Type t) : NdbDictObjectImpl(t) { @@ -3443,6 +3506,22 @@ NdbDatafileImpl::NdbDatafileImpl(NdbDictionary::Datafile & f) : NdbDatafileImpl::~NdbDatafileImpl(){ } +void +NdbDatafileImpl::assign(const NdbDatafileImpl& org) +{ + m_id = org.m_id; + m_version = org.m_version; + m_status = org.m_status; + m_type = org.m_type; + + m_size = org.m_size; + m_free = org.m_free; + m_filegroup_id = org.m_filegroup_id; + m_filegroup_version = org.m_filegroup_version; + m_path.assign(org.m_path); + m_filegroup_name.assign(org.m_filegroup_name); +} + NdbUndofileImpl::NdbUndofileImpl() : NdbDictionary::Undofile(* this), NdbFileImpl(NdbDictionary::Object::Undofile), m_facade(this) @@ -3458,6 +3537,22 @@ NdbUndofileImpl::NdbUndofileImpl(NdbDictionary::Undofile & f) : NdbUndofileImpl::~NdbUndofileImpl(){ } +void +NdbUndofileImpl::assign(const NdbUndofileImpl& org) +{ + m_id = org.m_id; + m_version = org.m_version; + m_status = org.m_status; + m_type = org.m_type; + + m_size = org.m_size; + m_free = org.m_free; + m_filegroup_id = org.m_filegroup_id; + m_filegroup_version = org.m_filegroup_version; + m_path.assign(org.m_path); + m_filegroup_name.assign(org.m_filegroup_name); +} + int NdbDictionaryImpl::createDatafile(const NdbDatafileImpl & file, bool force){ DBUG_ENTER("NdbDictionaryImpl::createDatafile"); @@ -3776,10 +3871,10 @@ int NdbDictInterface::get_filegroup(NdbFilegroupImpl & dst, NdbDictionary::Object::Type type, const char * name){ - DBUG_ENTER("NdbDictInterface::get_filegroup"); + DBUG_ENTER("NdbDictInterface::get_filegroup"); NdbApiSignal tSignal(m_reference); GetTabInfoReq * req = CAST_PTR(GetTabInfoReq, tSignal.getDataPtrSend()); - + size_t strLen = strlen(name) + 1; req->senderRef = m_reference; @@ -3794,7 +3889,7 @@ NdbDictInterface::get_filegroup(NdbFilegroupImpl & dst, LinearSectionPtr ptr[1]; ptr[0].p = (Uint32*)name; ptr[0].sz = (strLen + 3)/4; - + int r = dictSignal(&tSignal, ptr, 1, -1, // any node WAIT_GET_TAB_INFO_REQ, @@ -3804,11 +3899,11 @@ NdbDictInterface::get_filegroup(NdbFilegroupImpl & dst, DBUG_PRINT("info", ("get_filegroup failed dictSignal")); DBUG_RETURN(-1); } - - m_error.code = parseFilegroupInfo(dst, - (Uint32*)m_buffer.get_data(), + + m_error.code = parseFilegroupInfo(dst, + (Uint32*)m_buffer.get_data(), m_buffer.length() / 4); - + if(m_error.code) { DBUG_PRINT("info", ("get_filegroup failed parseFilegroupInfo %d", @@ -3816,6 +3911,15 @@ NdbDictInterface::get_filegroup(NdbFilegroupImpl & dst, DBUG_RETURN(m_error.code); } + if(dst.m_type == NdbDictionary::Object::Tablespace) + { + NdbDictionary::LogfileGroup tmp; + get_filegroup(NdbLogfileGroupImpl::getImpl(tmp), + NdbDictionary::Object::LogfileGroup, + dst.m_logfile_group_id); + dst.m_logfile_group_name.assign(tmp.getName()); + } + if(dst.m_type == type) { DBUG_RETURN(0); @@ -3852,10 +3956,59 @@ NdbDictInterface::parseFilegroupInfo(NdbFilegroupImpl &dst, dst.m_undo_buffer_size = fg.LF_UndoBufferSize; dst.m_logfile_group_id = fg.TS_LogfileGroupId; dst.m_logfile_group_version = fg.TS_LogfileGroupVersion; + dst.m_undo_free_words= ((Uint64)fg.LF_UndoFreeWordsHi << 32) + | (fg.LF_UndoFreeWordsLo); + return 0; } int +NdbDictInterface::get_filegroup(NdbFilegroupImpl & dst, + NdbDictionary::Object::Type type, + Uint32 id){ + DBUG_ENTER("NdbDictInterface::get_filegroup"); + NdbApiSignal tSignal(m_reference); + GetTabInfoReq * req = CAST_PTR(GetTabInfoReq, tSignal.getDataPtrSend()); + + req->senderRef = m_reference; + req->senderData = 0; + req->requestType = + GetTabInfoReq::RequestById | GetTabInfoReq::LongSignalConf; + req->tableId = id; + tSignal.theReceiversBlockNumber = DBDICT; + tSignal.theVerId_signalNumber = GSN_GET_TABINFOREQ; + tSignal.theLength = GetTabInfoReq::SignalLength; + + int r = dictSignal(&tSignal, NULL, 1, + -1, // any node + WAIT_GET_TAB_INFO_REQ, + WAITFOR_RESPONSE_TIMEOUT, 100); + if (r) + { + DBUG_PRINT("info", ("get_filegroup failed dictSignal")); + DBUG_RETURN(-1); + } + + m_error.code = parseFilegroupInfo(dst, + (Uint32*)m_buffer.get_data(), + m_buffer.length() / 4); + + if(m_error.code) + { + DBUG_PRINT("info", ("get_filegroup failed parseFilegroupInfo %d", + m_error.code)); + DBUG_RETURN(m_error.code); + } + + if(dst.m_type == type) + { + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("get_filegroup failed no such filegroup")); + DBUG_RETURN(m_error.code = GetTabInfoRef::TableNotDefined); +} + +int NdbDictInterface::get_file(NdbFileImpl & dst, NdbDictionary::Object::Type type, int node, @@ -3900,6 +4053,26 @@ NdbDictInterface::get_file(NdbFileImpl & dst, DBUG_RETURN(m_error.code); } + if(dst.m_type == NdbDictionary::Object::Undofile) + { + NdbDictionary::LogfileGroup tmp; + get_filegroup(NdbLogfileGroupImpl::getImpl(tmp), + NdbDictionary::Object::LogfileGroup, + dst.m_filegroup_id); + dst.m_filegroup_name.assign(tmp.getName()); + } + else if(dst.m_type == NdbDictionary::Object::Datafile) + { + NdbDictionary::Tablespace tmp; + get_filegroup(NdbTablespaceImpl::getImpl(tmp), + NdbDictionary::Object::Tablespace, + dst.m_filegroup_id); + dst.m_filegroup_name.assign(tmp.getName()); + dst.m_free *= tmp.getExtentSize(); + } + else + dst.m_filegroup_name.assign("Not Yet Implemented"); + if(dst.m_type == type) { DBUG_RETURN(0); @@ -3929,12 +4102,11 @@ NdbDictInterface::parseFileInfo(NdbFileImpl &dst, dst.m_id= f.FileNo; dst.m_size= ((Uint64)f.FileSizeHi << 32) | (f.FileSizeLo); - dst.m_free= f.FileFreeExtents; dst.m_path.assign(f.FileName); - //dst.m_filegroup_name + dst.m_filegroup_id= f.FilegroupId; dst.m_filegroup_version= f.FilegroupVersion; - + dst.m_free= f.FileFreeExtents; return 0; } @@ -3945,3 +4117,13 @@ template class Vector<Vector<Uint32> >; template class Vector<NdbTableImpl*>; template class Vector<NdbColumnImpl*>; +const NdbDictionary::Column * NdbDictionary::Column::FRAGMENT = 0; +const NdbDictionary::Column * NdbDictionary::Column::FRAGMENT_MEMORY = 0; +const NdbDictionary::Column * NdbDictionary::Column::ROW_COUNT = 0; +const NdbDictionary::Column * NdbDictionary::Column::COMMIT_COUNT = 0; +const NdbDictionary::Column * NdbDictionary::Column::ROW_SIZE = 0; +const NdbDictionary::Column * NdbDictionary::Column::RANGE_NO = 0; +const NdbDictionary::Column * NdbDictionary::Column::DISK_REF = 0; +const NdbDictionary::Column * NdbDictionary::Column::RECORDS_IN_RANGE = 0; +const NdbDictionary::Column * NdbDictionary::Column::ROWID = 0; +const NdbDictionary::Column * NdbDictionary::Column::ROW_GCI = 0; diff --git a/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp b/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp index 0e03fe80ecd..cd6b9199250 100644 --- a/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp +++ b/storage/ndb/src/ndbapi/NdbDictionaryImpl.hpp @@ -154,6 +154,8 @@ public: Vector<Uint16> m_fragments; bool m_logging; + bool m_row_gci; + bool m_row_checksum; int m_kvalue; int m_minLoadFactor; int m_maxLoadFactor; @@ -292,6 +294,7 @@ struct NdbFilegroupImpl : public NdbDictObjectImpl { BaseString m_logfile_group_name; Uint32 m_logfile_group_id; Uint32 m_logfile_group_version; + Uint64 m_undo_free_words; }; class NdbTablespaceImpl : public NdbDictionary::Tablespace, @@ -301,6 +304,8 @@ public: NdbTablespaceImpl(NdbDictionary::Tablespace &); ~NdbTablespaceImpl(); + void assign(const NdbTablespaceImpl&); + static NdbTablespaceImpl & getImpl(NdbDictionary::Tablespace & t); static const NdbTablespaceImpl & getImpl(const NdbDictionary::Tablespace &); NdbDictionary::Tablespace * m_facade; @@ -313,6 +318,8 @@ public: NdbLogfileGroupImpl(NdbDictionary::LogfileGroup &); ~NdbLogfileGroupImpl(); + void assign(const NdbLogfileGroupImpl&); + static NdbLogfileGroupImpl & getImpl(NdbDictionary::LogfileGroup & t); static const NdbLogfileGroupImpl& getImpl(const NdbDictionary::LogfileGroup&); @@ -336,6 +343,8 @@ public: NdbDatafileImpl(NdbDictionary::Datafile &); ~NdbDatafileImpl(); + void assign(const NdbDatafileImpl&); + static NdbDatafileImpl & getImpl(NdbDictionary::Datafile & t); static const NdbDatafileImpl & getImpl(const NdbDictionary::Datafile & t); NdbDictionary::Datafile * m_facade; @@ -347,6 +356,8 @@ public: NdbUndofileImpl(NdbDictionary::Undofile &); ~NdbUndofileImpl(); + void assign(const NdbUndofileImpl&); + static NdbUndofileImpl & getImpl(NdbDictionary::Undofile & t); static const NdbUndofileImpl & getImpl(const NdbDictionary::Undofile & t); NdbDictionary::Undofile * m_facade; @@ -402,10 +413,10 @@ public: const Uint32 * data, Uint32 len, bool fullyQualifiedNames); - static int parseFileInfo(NdbFileImpl &dst, + static int parseFileInfo(NdbFileImpl &dst, const Uint32 * data, Uint32 len); - - static int parseFilegroupInfo(NdbFilegroupImpl &dst, + + static int parseFilegroupInfo(NdbFilegroupImpl &dst, const Uint32 * data, Uint32 len); int create_file(const NdbFileImpl &, const NdbFilegroupImpl&, bool overwrite = false); @@ -413,7 +424,7 @@ public: int create_filegroup(const NdbFilegroupImpl &); int drop_filegroup(const NdbFilegroupImpl &); - int get_filegroup(NdbFilegroupImpl&, NdbDictionary::Object::Type, int); + int get_filegroup(NdbFilegroupImpl&, NdbDictionary::Object::Type, Uint32); int get_filegroup(NdbFilegroupImpl&,NdbDictionary::Object::Type,const char*); int get_file(NdbFileImpl&, NdbDictionary::Object::Type, int, int); int get_file(NdbFileImpl&, NdbDictionary::Object::Type, int, const char *); diff --git a/storage/ndb/src/ndbapi/NdbScanOperation.cpp b/storage/ndb/src/ndbapi/NdbScanOperation.cpp index 2fcbeec1bf0..577eef78816 100644 --- a/storage/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/storage/ndb/src/ndbapi/NdbScanOperation.cpp @@ -161,6 +161,16 @@ NdbScanOperation::readTuples(NdbScanOperation::LockMode lm, } m_keyInfo = lockExcl ? 1 : 0; + bool tupScan = (scan_flags & SF_TupScan); + +#if 1 // XXX temp for testing + { char* p = getenv("NDB_USE_TUPSCAN"); + if (p != 0) { + unsigned n = atoi(p); // 0-10 + if (::time(0) % 10 < n) tupScan = true; + } + } +#endif bool rangeScan = false; if (m_accessTable->m_indexType == NdbDictionary::Index::OrderedIndex) @@ -176,11 +186,8 @@ NdbScanOperation::readTuples(NdbScanOperation::LockMode lm, theStatus = GetValue; theOperationType = OpenRangeScanRequest; rangeScan = true; - } - - bool tupScan = (scan_flags & SF_TupScan); - if (tupScan && rangeScan) tupScan = false; + } theParallelism = parallel; diff --git a/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp b/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp index 3a69ef10afa..405141cd143 100644 --- a/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp +++ b/storage/ndb/src/ndbapi/ndb_cluster_connection.cpp @@ -556,5 +556,17 @@ void Ndb_cluster_connection_impl::connect_thread() DBUG_VOID_RETURN; } +void +Ndb_cluster_connection::init_get_next_node(Ndb_cluster_connection_node_iter &iter) +{ + m_impl.init_get_next_node(iter); +} + +Uint32 +Ndb_cluster_connection::get_next_node(Ndb_cluster_connection_node_iter &iter) +{ + return m_impl.get_next_node(iter); +} + template class Vector<Ndb_cluster_connection_impl::Node>; diff --git a/storage/ndb/src/ndbapi/ndb_cluster_connection_impl.hpp b/storage/ndb/src/ndbapi/ndb_cluster_connection_impl.hpp index df30189d713..561babd62c1 100644 --- a/storage/ndb/src/ndbapi/ndb_cluster_connection_impl.hpp +++ b/storage/ndb/src/ndbapi/ndb_cluster_connection_impl.hpp @@ -26,15 +26,6 @@ class ConfigRetriever; class NdbThread; class ndb_mgm_configuration; -struct Ndb_cluster_connection_node_iter { - Ndb_cluster_connection_node_iter() : scan_state(~0), - init_pos(0), - cur_pos(0) {}; - Uint8 scan_state; - Uint8 init_pos; - Uint8 cur_pos; -}; - extern "C" { void* run_ndb_cluster_connection_connect_thread(void*); } diff --git a/storage/ndb/src/ndbapi/ndberror.c b/storage/ndb/src/ndbapi/ndberror.c index 73f08e8ac77..8103d1d179e 100644 --- a/storage/ndb/src/ndbapi/ndberror.c +++ b/storage/ndb/src/ndbapi/ndberror.c @@ -172,6 +172,7 @@ ErrorBundle ErrorCodes[] = { { 805, DMEC, TR, "Out of attrinfo records in tuple manager" }, { 830, DMEC, TR, "Out of add fragment operation records" }, { 873, DMEC, TR, "Out of attrinfo records for scan in tuple manager" }, + { 899, DMEC, TR, "Rowid already allocated" }, { 1217, DMEC, TR, "Out of operation records in local data manager (increase MaxNoOfLocalOperations)" }, { 1220, DMEC, TR, "REDO log files overloaded, consult online manual (decrease TimeBetweenLocalCheckpoints, and|or increase NoOfFragmentLogFiles)" }, { 1222, DMEC, TR, "Out of transaction markers in LQH" }, @@ -392,6 +393,20 @@ ErrorBundle ErrorCodes[] = { { 1231, DMEC, SE, "Invalid table or index to scan" }, { 1232, DMEC, SE, "Invalid table or index to scan" }, + { 1502, DMEC, IE, "Filegroup already exists" }, + { 1503, DMEC, SE, "Out of filegroup records" }, + { 1504, DMEC, SE, "Out of logbuffer memory" }, + { 1505, DMEC, IE, "Invalid filegroup" }, + { 1506, DMEC, IE, "Invalid filegroup version" }, + { 1507, DMEC, IE, "File no already inuse" }, + { 1508, DMEC, SE, "Out of file records" }, + { 1509, DMEC, SE, "File system error, check if path,permissions etc" }, + { 1510, DMEC, IE, "File meta data error" }, + { 1511, DMEC, IE, "Out of memory" }, + { 1512, DMEC, SE, "File read error" }, + { 1513, DMEC, IE, "Filegroup not online" }, + + /** * FunctionNotImplemented */ diff --git a/storage/ndb/test/include/NdbRestarter.hpp b/storage/ndb/test/include/NdbRestarter.hpp index 85c3f92f9ee..e9bf96148d1 100644 --- a/storage/ndb/test/include/NdbRestarter.hpp +++ b/storage/ndb/test/include/NdbRestarter.hpp @@ -82,8 +82,7 @@ protected: int connect(); void disconnect(); int getStatus(); - - Vector<ndb_mgm_node_state> ndbNodes; + Vector<ndb_mgm_node_state> mgmNodes; Vector<ndb_mgm_node_state> apiNodes; @@ -92,6 +91,9 @@ protected: ndb_mgm_configuration * m_config; protected: ndb_mgm_configuration * getConfig(); + +public: + Vector<ndb_mgm_node_state> ndbNodes; }; #endif diff --git a/storage/ndb/test/ndbapi/bank/Bank.cpp b/storage/ndb/test/ndbapi/bank/Bank.cpp index fc23ebb59ce..294ed80edcf 100644 --- a/storage/ndb/test/ndbapi/bank/Bank.cpp +++ b/storage/ndb/test/ndbapi/bank/Bank.cpp @@ -108,6 +108,7 @@ retry_transaction: break; case NDBT_TEMPORARY: g_err << "TEMPORARY_ERRROR retrying" << endl; + NdbSleep_MilliSleep(50); goto retry_transaction; break; default: @@ -1401,84 +1402,103 @@ int Bank::getOldestPurgedGL(const Uint32 accountType, /** * SELECT MAX(time) FROM GL WHERE account_type = @accountType and purged=1 */ - NdbConnection* pScanTrans = m_ndb.startTransaction(); - if (pScanTrans == NULL) { - ERR(m_ndb.getNdbError()); - return NDBT_FAILED; - } - - NdbScanOperation* pOp = pScanTrans->getNdbScanOperation("GL"); - if (pOp == NULL) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - if( pOp->readTuples() ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - check = pOp->interpret_exit_ok(); - if( check == -1 ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - NdbRecAttr* accountTypeRec = pOp->getValue("ACCOUNT_TYPE"); - if( accountTypeRec ==NULL ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - NdbRecAttr* timeRec = pOp->getValue("TIME"); - if( timeRec ==NULL ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - NdbRecAttr* purgedRec = pOp->getValue("PURGED"); - if( purgedRec ==NULL ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - check = pScanTrans->execute(NoCommit); - if( check == -1 ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } + NdbConnection* pScanTrans = 0; + do + { + pScanTrans = m_ndb.startTransaction(); + if (pScanTrans == NULL) { + ERR(m_ndb.getNdbError()); + return NDBT_FAILED; + } - int eof; - int rows = 0; - eof = pOp->nextResult(); - oldest = 0; + NdbScanOperation* pOp = pScanTrans->getNdbScanOperation("GL"); + if (pOp == NULL) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } - while(eof == 0){ - rows++; - Uint32 a = accountTypeRec->u_32_value(); - Uint32 p = purgedRec->u_32_value(); - - if (a == accountType && p == 1){ - // One record found - Uint64 t = timeRec->u_64_value(); - if (t > oldest) - oldest = t; + if( pOp->readTuples() ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; } + + check = pOp->interpret_exit_ok(); + if( check == -1 ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + NdbRecAttr* accountTypeRec = pOp->getValue("ACCOUNT_TYPE"); + if( accountTypeRec ==NULL ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + NdbRecAttr* timeRec = pOp->getValue("TIME"); + if( timeRec ==NULL ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + NdbRecAttr* purgedRec = pOp->getValue("PURGED"); + if( purgedRec ==NULL ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + check = pScanTrans->execute(NoCommit); + if( check == -1 ) { + NdbError err = pScanTrans->getNdbError(); + ERR(err); + m_ndb.closeTransaction(pScanTrans); + if (err.status == NdbError::TemporaryError) + { + NdbSleep_MilliSleep(50); + continue; + } + return NDBT_FAILED; + } + + int eof; + int rows = 0; eof = pOp->nextResult(); - } - if (eof == -1) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } + oldest = 0; + while(eof == 0){ + rows++; + Uint32 a = accountTypeRec->u_32_value(); + Uint32 p = purgedRec->u_32_value(); + + if (a == accountType && p == 1){ + // One record found + Uint64 t = timeRec->u_64_value(); + if (t > oldest) + oldest = t; + } + eof = pOp->nextResult(); + } + if (eof == -1) + { + NdbError err = pScanTrans->getNdbError(); + ERR(err); + m_ndb.closeTransaction(pScanTrans); + + if (err.status == NdbError::TemporaryError) + { + NdbSleep_MilliSleep(50); + continue; + } + return NDBT_FAILED; + } + break; + } while(true); + m_ndb.closeTransaction(pScanTrans); return NDBT_OK; @@ -1587,88 +1607,111 @@ int Bank::checkNoTransactionsOlderThan(const Uint32 accountType, * */ - int check; - NdbConnection* pScanTrans = m_ndb.startTransaction(); - if (pScanTrans == NULL) { - ERR(m_ndb.getNdbError()); - return NDBT_FAILED; - } - - NdbScanOperation* pOp = pScanTrans->getNdbScanOperation("TRANSACTION"); - if (pOp == NULL) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - if( pOp->readTuples() ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - check = pOp->interpret_exit_ok(); - if( check == -1 ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - NdbRecAttr* accountTypeRec = pOp->getValue("ACCOUNT_TYPE"); - if( accountTypeRec ==NULL ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - NdbRecAttr* timeRec = pOp->getValue("TIME"); - if( timeRec ==NULL ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - NdbRecAttr* transactionIdRec = pOp->getValue("TRANSACTION_ID"); - if( transactionIdRec ==NULL ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - check = pScanTrans->execute(NoCommit); - if( check == -1 ) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } - - int eof; - int rows = 0; + int loop = 0; int found = 0; - eof = pOp->nextResult(); + NdbConnection* pScanTrans = 0; + do { + int check; + loop++; + pScanTrans = m_ndb.startTransaction(); + if (pScanTrans == NULL) { + ERR(m_ndb.getNdbError()); + return NDBT_FAILED; + } - while(eof == 0){ - rows++; - Uint32 a = accountTypeRec->u_32_value(); - Uint32 t = timeRec->u_32_value(); + NdbScanOperation* pOp = pScanTrans->getNdbScanOperation("TRANSACTION"); + if (pOp == NULL) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + if( pOp->readTuples() ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + check = pOp->interpret_exit_ok(); + if( check == -1 ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + NdbRecAttr* accountTypeRec = pOp->getValue("ACCOUNT_TYPE"); + if( accountTypeRec ==NULL ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + NdbRecAttr* timeRec = pOp->getValue("TIME"); + if( timeRec ==NULL ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + NdbRecAttr* transactionIdRec = pOp->getValue("TRANSACTION_ID"); + if( transactionIdRec ==NULL ) { + ERR(pScanTrans->getNdbError()); + m_ndb.closeTransaction(pScanTrans); + return NDBT_FAILED; + } + + check = pScanTrans->execute(NoCommit); + if( check == -1 ) { + NdbError err = pScanTrans->getNdbError(); + ERR(err); + m_ndb.closeTransaction(pScanTrans); - if (a == accountType && t <= oldest){ - // One record found - Uint64 ti = transactionIdRec->u_64_value(); - g_err << "checkNoTransactionsOlderThan found one record" << endl - << " t = " << t << endl - << " a = " << a << endl - << " ti = " << ti << endl; - found++; + if (err.status == NdbError::TemporaryError) + { + NdbSleep_MilliSleep(50); + continue; + } + return NDBT_FAILED; } + + int eof; + int rows = 0; + found = 0; eof = pOp->nextResult(); - } - if (eof == -1) { - ERR(pScanTrans->getNdbError()); - m_ndb.closeTransaction(pScanTrans); - return NDBT_FAILED; - } + while(eof == 0){ + rows++; + Uint32 a = accountTypeRec->u_32_value(); + Uint32 t = timeRec->u_32_value(); + + if (a == accountType && t <= oldest){ + // One record found + Uint64 ti = transactionIdRec->u_64_value(); + g_err << "checkNoTransactionsOlderThan found one record" << endl + << " t = " << t << endl + << " a = " << a << endl + << " ti = " << ti << endl; + found++; + } + eof = pOp->nextResult(); + } + if (eof == -1) { + NdbError err = pScanTrans->getNdbError(); + ERR(err); + m_ndb.closeTransaction(pScanTrans); + + if (err.status == NdbError::TemporaryError) + { + NdbSleep_MilliSleep(50); + continue; + } + + return NDBT_FAILED; + } + + break; + } while(true); + m_ndb.closeTransaction(pScanTrans); if (found == 0) @@ -1949,32 +1992,43 @@ int Bank::performIncreaseTime(int maxSleepBetweenDays, int yield) int Bank::readSystemValue(SystemValueId sysValId, Uint64 & value){ int check; + NdbConnection* pTrans = 0; + while (true) + { + pTrans = m_ndb.startTransaction(); + if (pTrans == NULL) + { + ERR(m_ndb.getNdbError()); + if(m_ndb.getNdbError().status == NdbError::TemporaryError) + { + NdbSleep_MilliSleep(50); + continue; + } + return NDBT_FAILED; + } - NdbConnection* pTrans = m_ndb.startTransaction(); - if (pTrans == NULL){ - ERR(m_ndb.getNdbError()); - if(m_ndb.getNdbError().status == NdbError::TemporaryError) - return NDBT_TEMPORARY; - return NDBT_FAILED; - } - - int result; - if ((result= prepareReadSystemValueOp(pTrans, sysValId, value)) != NDBT_OK) { - ERR(pTrans->getNdbError()); - m_ndb.closeTransaction(pTrans); - return result; - } - - check = pTrans->execute(Commit); - if( check == -1 ) { - ERR(pTrans->getNdbError()); - if(pTrans->getNdbError().status == NdbError::TemporaryError) + int result; + if ((result= prepareReadSystemValueOp(pTrans, sysValId, value)) != NDBT_OK) { + ERR(pTrans->getNdbError()); m_ndb.closeTransaction(pTrans); - return NDBT_TEMPORARY; + return result; } - m_ndb.closeTransaction(pTrans); - return NDBT_FAILED; + + check = pTrans->execute(Commit); + if( check == -1 ) { + NdbError err = pTrans->getNdbError(); + m_ndb.closeTransaction(pTrans); + ERR(err); + if(err.status == NdbError::TemporaryError) + { + NdbSleep_MilliSleep(50); + continue; + } + return NDBT_FAILED; + } + + break; } m_ndb.closeTransaction(pTrans); diff --git a/storage/ndb/test/ndbapi/testBasic.cpp b/storage/ndb/test/ndbapi/testBasic.cpp index 4d64b15ecfa..879a4979220 100644 --- a/storage/ndb/test/ndbapi/testBasic.cpp +++ b/storage/ndb/test/ndbapi/testBasic.cpp @@ -1034,6 +1034,100 @@ runMassiveRollback2(NDBT_Context* ctx, NDBT_Step* step){ return result; } +/** + * TUP errors + */ +struct TupError +{ + enum Bits { + TE_VARSIZE = 0x1, + TE_MULTI_OP = 0x2, + TE_DISK = 0x4, + TE_REPLICA = 0x8 + }; + int op; + int error; + int bits; +}; + +static +TupError +f_tup_errors[] = +{ + { NdbOperation::InsertRequest, 4014, 0 }, // Out of undo buffer + { NdbOperation::InsertRequest, 4015, TupError::TE_DISK }, // Out of log space + { NdbOperation::InsertRequest, 4016, 0 }, // AI Inconsistency + { NdbOperation::InsertRequest, 4017, 0 }, // Out of memory + { NdbOperation::InsertRequest, 4018, 0 }, // Null check error + { NdbOperation::InsertRequest, 4019, TupError::TE_REPLICA }, //Alloc rowid error + { NdbOperation::InsertRequest, 4020, TupError::TE_MULTI_OP }, // Size change error + { NdbOperation::InsertRequest, 4021, TupError::TE_DISK }, // Out of disk space + { -1, 0, 0 } +}; + +int +runTupErrors(NDBT_Context* ctx, NDBT_Step* step){ + + NdbRestarter restarter; + HugoTransactions hugoTrans(*ctx->getTab()); + HugoOperations hugoOps(*ctx->getTab()); + Ndb* pNdb = GETNDB(step); + + const NdbDictionary::Table * tab = ctx->getTab(); + Uint32 i; + int bits = TupError::TE_MULTI_OP; + for(i = 0; i<tab->getNoOfColumns(); i++) + { + if (tab->getColumn(i)->getArrayType() != NdbDictionary::Column::ArrayTypeFixed) + bits |= TupError::TE_VARSIZE; + if (tab->getColumn(i)->getStorageType()!= NdbDictionary::Column::StorageTypeMemory) + bits |= TupError::TE_DISK; + } + + if (restarter.getNumDbNodes() >= 2) + { + bits |= TupError::TE_REPLICA; + } + + /** + * Insert + */ + for(i = 0; f_tup_errors[i].op != -1; i++) + { + if (f_tup_errors[i].op != NdbOperation::InsertRequest) + { + g_info << "Skipping " << f_tup_errors[i].error + << " - not insert" << endl; + continue; + } + + if ((f_tup_errors[i].bits & bits) != f_tup_errors[i].bits) + { + g_info << "Skipping " << f_tup_errors[i].error + << " - req bits: " << hex << f_tup_errors[i].bits + << " bits: " << hex << bits << endl; + continue; + } + + g_info << "Testing error insert: " << f_tup_errors[i].error << endl; + restarter.insertErrorInAllNodes(f_tup_errors[i].error); + if (f_tup_errors[i].bits & TupError::TE_MULTI_OP) + { + + } + else + { + hugoTrans.loadTable(pNdb, 5); + } + restarter.insertErrorInAllNodes(0); + if (hugoTrans.clearTable(pNdb, 5) != 0) + { + return NDBT_FAILED; + } + } + + return NDBT_OK; +} NDBT_TESTSUITE(testBasic); TESTCASE("PkInsert", @@ -1277,6 +1371,10 @@ TESTCASE("Fill", INITIALIZER(runPkRead); FINALIZER(runClearTable2); } +TESTCASE("TupError", + "Verify what happens when we fill the db" ){ + INITIALIZER(runTupErrors); +} NDBT_TESTSUITE_END(testBasic); #if 0 diff --git a/storage/ndb/test/ndbapi/testNdbApi.cpp b/storage/ndb/test/ndbapi/testNdbApi.cpp index dd0628bb1b1..cfedfbd9ac0 100644 --- a/storage/ndb/test/ndbapi/testNdbApi.cpp +++ b/storage/ndb/test/ndbapi/testNdbApi.cpp @@ -1150,7 +1150,7 @@ int runBug_WritePartialIgnoreError(NDBT_Context* ctx, NDBT_Step* step){ Ndb* pNdb = GETNDB(step); C2(hugoOps.startTransaction(pNdb) == 0); C2(hugoOps.pkWritePartialRecord(pNdb, 0, 1) == 0); - C2(hugoOps.execute_Commit(pNdb, AO_IgnoreError) == 0); + C2(hugoOps.execute_Commit(pNdb, AO_IgnoreError) == 839); C2(hugoOps.closeTransaction(pNdb) == 0); return result; diff --git a/storage/ndb/test/ndbapi/testOperations.cpp b/storage/ndb/test/ndbapi/testOperations.cpp index 505b1620900..65b406f155d 100644 --- a/storage/ndb/test/ndbapi/testOperations.cpp +++ b/storage/ndb/test/ndbapi/testOperations.cpp @@ -18,6 +18,7 @@ #include "NDBT_ReturnCodes.h" #include "HugoTransactions.hpp" #include "UtilTransactions.hpp" +#include <NdbRestarter.hpp> struct OperationTestCase { const char * name; @@ -234,6 +235,11 @@ runClearTable(NDBT_Context* ctx, NDBT_Step* step){ if (utilTrans.clearTable2(GETNDB(step), records, 240) != 0){ return NDBT_FAILED; } + + NdbRestarter r; + int lcp = 7099; + r.dumpStateAllNodes(&lcp, 1); + return NDBT_OK; } diff --git a/storage/ndb/test/ndbapi/testSRBank.cpp b/storage/ndb/test/ndbapi/testSRBank.cpp index 6d57724f4c6..df7bfa3a7f9 100644 --- a/storage/ndb/test/ndbapi/testSRBank.cpp +++ b/storage/ndb/test/ndbapi/testSRBank.cpp @@ -122,36 +122,30 @@ int runBankSum(NDBT_Context* ctx, NDBT_Step* step){ result = NDBT_FAILED; \ continue; } -int runSR(NDBT_Context* ctx, NDBT_Step* step) +int +restart_cluster(NDBT_Context* ctx, NDBT_Step* step, NdbRestarter& restarter) { - int result = NDBT_OK; - int runtime = ctx->getNumLoops(); - int sleeptime = ctx->getNumRecords(); - NdbRestarter restarter; bool abort = true; int timeout = 180; + int result = NDBT_OK; - Uint32 now; - const Uint32 stop = time(0)+ runtime; - while(!ctx->isTestStopped() && ((now= time(0)) < stop) && result == NDBT_OK) + do { - ndbout << " -- Sleep " << sleeptime << "s " << endl; - NdbSleep_SecSleep(sleeptime); ndbout << " -- Shutting down " << endl; ctx->setProperty("SR", 1); CHECK(restarter.restartAll(false, true, abort) == 0); ctx->setProperty("SR", 2); CHECK(restarter.waitClusterNoStart(timeout) == 0); - + Uint32 cnt = ctx->getProperty("ThreadCount"); Uint32 curr= ctx->getProperty("ThreadStopped"); - while(curr != cnt) + while(curr != cnt && !ctx->isTestStopped()) { ndbout_c("%d %d", curr, cnt); NdbSleep_MilliSleep(100); curr= ctx->getProperty("ThreadStopped"); } - + ctx->setProperty("ThreadStopped", (Uint32)0); CHECK(restarter.startAll() == 0); CHECK(restarter.waitClusterStarted(timeout) == 0); @@ -166,18 +160,166 @@ int runSR(NDBT_Context* ctx, NDBT_Step* step) ndbout << "bank.performSumAccounts FAILED" << endl; return NDBT_FAILED; } - + if (bank.performValidateAllGLs() != 0) { ndbout << "bank.performValidateAllGLs FAILED" << endl; return NDBT_FAILED; } } - + ndbout << " -- Validating complete " << endl; - ctx->setProperty("SR", (Uint32)0); - ctx->broadcast(); + } while(0); + ctx->setProperty("SR", (Uint32)0); + ctx->broadcast(); + return result; +} + +ndb_mgm_node_state* +select_node_to_stop(Vector<ndb_mgm_node_state>& nodes) +{ + Uint32 i, j; + Vector<ndb_mgm_node_state*> alive_nodes; + for(i = 0; i<nodes.size(); i++) + { + ndb_mgm_node_state* node = &nodes[i]; + if (node->node_status == NDB_MGM_NODE_STATUS_STARTED) + alive_nodes.push_back(node); + } + + Vector<ndb_mgm_node_state*> victims; + // Remove those with one in node group + for(i = 0; i<alive_nodes.size(); i++) + { + int group = alive_nodes[i]->node_group; + for(j = 0; j<alive_nodes.size(); j++) + { + if (i != j && alive_nodes[j]->node_group == group) + { + victims.push_back(alive_nodes[i]); + break; + } + } + } + + if (victims.size()) + { + int victim = rand() % victims.size(); + return victims[victim]; + } + else + { + return 0; + } +} + +ndb_mgm_node_state* +select_node_to_start(Vector<ndb_mgm_node_state>& nodes) +{ + Uint32 i, j; + Vector<ndb_mgm_node_state*> victims; + for(i = 0; i<nodes.size(); i++) + { + ndb_mgm_node_state* node = &nodes[i]; + if (node->node_status == NDB_MGM_NODE_STATUS_NOT_STARTED) + victims.push_back(node); + } + + if (victims.size()) + { + int victim = rand() % victims.size(); + return victims[victim]; + } + else + { + return 0; } +} + +enum Action { + AA_RestartCluster = 0x1, + AA_RestartNode = 0x2, + AA_StopNode = 0x4, + AA_StartNode = 0x8, + AA_COUNT = 4 +}; + +int +runMixRestart(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + int runtime = ctx->getNumLoops(); + int sleeptime = ctx->getNumRecords(); + NdbRestarter restarter; + int timeout = 180; + Uint32 type = ctx->getProperty("Type", ~(Uint32)0); + + restarter.waitClusterStarted(); + Vector<ndb_mgm_node_state> nodes; + nodes = restarter.ndbNodes; +#if 0 + for (Uint32 i = 0; i<restarter.ndbNodes.size(); i++) + nodes.push_back(restarter.ndbNodes[i]); +#endif + + + Uint32 now; + const Uint32 stop = time(0)+ runtime; + while(!ctx->isTestStopped() && ((now= time(0)) < stop) && result == NDBT_OK) + { + ndbout << " -- Sleep " << sleeptime << "s " << endl; + int cnt = sleeptime; + while (cnt-- && !ctx->isTestStopped()) + NdbSleep_SecSleep(1); + if (ctx->isTestStopped()) + return NDBT_FAILED; + + ndb_mgm_node_state* node = 0; + int action; +loop: + while(((action = (1 << (rand() % AA_COUNT))) & type) == 0); + switch(action){ + case AA_RestartCluster: + if (restart_cluster(ctx, step, restarter)) + return NDBT_FAILED; + for (Uint32 i = 0; i<nodes.size(); i++) + nodes[i].node_status = NDB_MGM_NODE_STATUS_STARTED; + break; + case AA_RestartNode: + case AA_StopNode: + { + if ((node = select_node_to_stop(nodes)) == 0) + goto loop; + + if (action == AA_RestartNode) + { + g_err << "Restarting " << node->node_id << endl; + if (restarter.restartOneDbNode(node->node_id, false, false, true)) + return NDBT_FAILED; + } + if (action == AA_StopNode) + { + g_err << "Stopping " << node->node_id << endl; + if (restarter.restartOneDbNode(node->node_id, false, true, true)) + return NDBT_FAILED; + node->node_status = NDB_MGM_NODE_STATUS_NOT_STARTED; + } + break; + } + case AA_StartNode: + if ((node = select_node_to_start(nodes)) == 0) + goto loop; + g_err << "Starting " << node->node_id << endl; + if (restarter.startNodes(&node->node_id, 1)) + return NDBT_FAILED; + if (restarter.waitNodesStarted(&node->node_id, 1)) + return NDBT_FAILED; + + node->node_status = NDB_MGM_NODE_STATUS_STARTED; + break; + } + } + ctx->stopTest(); return NDBT_OK; } @@ -191,13 +333,14 @@ int runDropBank(NDBT_Context* ctx, NDBT_Step* step){ NDBT_TESTSUITE(testSRBank); -TESTCASE("Graceful", +TESTCASE("SR", " Test that a consistent bank is restored after graceful shutdown\n" "1. Create bank\n" "2. Start bank and let it run\n" "3. Restart ndb and verify consistency\n" "4. Drop bank\n") { + TC_PROPERTY("Type", AA_RestartCluster); INITIALIZER(runCreateBank); STEP(runBankTimer); STEP(runBankTransactions); @@ -211,15 +354,16 @@ TESTCASE("Graceful", STEP(runBankTransactions); STEP(runBankTransactions); STEP(runBankGL); - STEP(runSR); + STEP(runMixRestart); } -TESTCASE("Abort", +TESTCASE("NR", " Test that a consistent bank is restored after graceful shutdown\n" "1. Create bank\n" "2. Start bank and let it run\n" "3. Restart ndb and verify consistency\n" "4. Drop bank\n") { + TC_PROPERTY("Type", AA_RestartNode | AA_StopNode | AA_StartNode); INITIALIZER(runCreateBank); STEP(runBankTimer); STEP(runBankTransactions); @@ -233,7 +377,31 @@ TESTCASE("Abort", STEP(runBankTransactions); STEP(runBankTransactions); STEP(runBankGL); - STEP(runSR); + STEP(runMixRestart); + FINALIZER(runDropBank); +} +TESTCASE("Mix", + " Test that a consistent bank is restored after graceful shutdown\n" + "1. Create bank\n" + "2. Start bank and let it run\n" + "3. Restart ndb and verify consistency\n" + "4. Drop bank\n") +{ + TC_PROPERTY("Type", ~0); + INITIALIZER(runCreateBank); + STEP(runBankTimer); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankTransactions); + STEP(runBankGL); + STEP(runMixRestart); FINALIZER(runDropBank); } NDBT_TESTSUITE_END(testSRBank); @@ -243,4 +411,4 @@ int main(int argc, const char** argv){ return testSRBank.execute(argc, argv); } - +template class Vector<ndb_mgm_node_state*>; diff --git a/storage/ndb/test/ndbapi/testScan.cpp b/storage/ndb/test/ndbapi/testScan.cpp index 2802f1c950e..ee2dd56c571 100644 --- a/storage/ndb/test/ndbapi/testScan.cpp +++ b/storage/ndb/test/ndbapi/testScan.cpp @@ -1125,6 +1125,88 @@ runScanParallelism(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int +runScanVariants(NDBT_Context* ctx, NDBT_Step* step) +{ + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + Ndb * pNdb = GETNDB(step); + const NdbDictionary::Table* pTab = ctx->getTab(); + + HugoCalculator calc(* pTab); + NDBT_ResultRow tmpRow(* pTab); + + for(int lm = 0; lm <= NdbOperation::LM_CommittedRead; lm++) + { + for(int flags = 0; flags < 4; flags++) + { + for (int par = 0; par < 16; par += 1 + (rand() % 3)) + { + bool disk = flags & 1; + bool tups = flags & 2; + g_info << "lm: " << lm + << " disk: " << disk + << " tup scan: " << tups + << " par: " << par + << endl; + + NdbConnection* pCon = pNdb->startTransaction(); + NdbScanOperation* pOp = pCon->getNdbScanOperation(pTab->getName()); + if (pOp == NULL) { + ERR(pCon->getNdbError()); + return NDBT_FAILED; + } + + if( pOp->readTuples((NdbOperation::LockMode)lm, + tups ? NdbScanOperation::SF_TupScan : 0, + par) != 0) + { + ERR(pCon->getNdbError()); + return NDBT_FAILED; + } + + int check = pOp->interpret_exit_ok(); + if( check == -1 ) { + ERR(pCon->getNdbError()); + return NDBT_FAILED; + } + + // Define attributes to read + bool found_disk = false; + for(int a = 0; a<pTab->getNoOfColumns(); a++){ + if (pTab->getColumn(a)->getStorageType() == NdbDictionary::Column::StorageTypeDisk) + { + found_disk = true; + if (!disk) + continue; + } + + if((pOp->getValue(pTab->getColumn(a)->getName())) == 0) { + ERR(pCon->getNdbError()); + return NDBT_FAILED; + } + } + + if (! (disk && !found_disk)) + { + check = pCon->execute(NoCommit); + if( check == -1 ) { + ERR(pCon->getNdbError()); + return NDBT_FAILED; + } + + int res; + int row = 0; + while((res = pOp->nextResult()) == 0); + } + pCon->close(); + } + } + } + + return NDBT_OK; +} + NDBT_TESTSUITE(testScan); TESTCASE("ScanRead", "Verify scan requirement: It should be possible "\ @@ -1162,17 +1244,6 @@ TESTCASE("ScanReadCommitted240", STEP(runScanReadCommitted); FINALIZER(runClearTable); } -TESTCASE("ScanTupReadCommitted240", - "Verify scan requirement: It should be possible to scan read committed with "\ - "parallelism, test with parallelism 240(240 would automatically be "\ - "downgraded to the maximum parallelism value for the current config). "\ - "Scans TUP pages directly without using ACC."){ - INITIALIZER(runLoadTable); - TC_PROPERTY("Parallelism", 240); - TC_PROPERTY("TupScan", 1); - STEP(runScanReadCommitted); - FINALIZER(runClearTable); -} TESTCASE("ScanUpdate", "Verify scan requirement: It should be possible "\ "to update all records in a table without knowing their"\ @@ -1603,6 +1674,12 @@ TESTCASE("ScanParallelism", STEP(runScanParallelism); FINALIZER(runClearTable); } +TESTCASE("ScanVariants", + "Test different scan variants"){ + INITIALIZER(runLoadTable); + STEP(runScanVariants); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testScan); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/ndbapi/testTransactions.cpp b/storage/ndb/test/ndbapi/testTransactions.cpp index 46be808d8a5..3de088a5818 100644 --- a/storage/ndb/test/ndbapi/testTransactions.cpp +++ b/storage/ndb/test/ndbapi/testTransactions.cpp @@ -359,6 +359,11 @@ runClearTable(NDBT_Context* ctx, NDBT_Step* step){ if (utilTrans.clearTable2(GETNDB(step), records, 240) != 0){ return NDBT_FAILED; } + + NdbRestarter r; + int lcp = 7099; + r.dumpStateAllNodes(&lcp, 1); + return NDBT_OK; } diff --git a/storage/ndb/test/ndbapi/test_event.cpp b/storage/ndb/test/ndbapi/test_event.cpp index 1bb614c1c8b..87065e754b8 100644 --- a/storage/ndb/test/ndbapi/test_event.cpp +++ b/storage/ndb/test/ndbapi/test_event.cpp @@ -343,6 +343,16 @@ int runCreateShadowTable(NDBT_Context* ctx, NDBT_Step* step) return NDBT_FAILED; } +int runDropShadowTable(NDBT_Context* ctx, NDBT_Step* step) +{ + const NdbDictionary::Table *table= ctx->getTab(); + char buf[1024]; + sprintf(buf, "%s_SHADOW", table->getName()); + + GETNDB(step)->getDictionary()->dropTable(buf); + return NDBT_OK; +} + int runCreateDropEventOperation(NDBT_Context* ctx, NDBT_Step* step) { int loops = ctx->getNumLoops(); @@ -1443,6 +1453,7 @@ TESTCASE("EventOperationApplier", STEP(runEventMixedLoad); FINALIZER(runDropEvent); FINALIZER(runVerify); + FINALIZER(runDropShadowTable); } TESTCASE("EventOperationApplier_NR", "Verify that if we apply the data we get from event " @@ -1455,6 +1466,7 @@ TESTCASE("EventOperationApplier_NR", STEP(runRestarter); FINALIZER(runDropEvent); FINALIZER(runVerify); + FINALIZER(runDropShadowTable); } TESTCASE("Multi", "Verify that we can work with all tables in parallell" diff --git a/storage/ndb/test/run-test/conf-daily-basic-ndbmaster.txt b/storage/ndb/test/run-test/conf-daily-basic-ndbmaster.txt index bcd809593f3..30e90a9527f 100644 --- a/storage/ndb/test/run-test/conf-daily-basic-ndbmaster.txt +++ b/storage/ndb/test/run-test/conf-daily-basic-ndbmaster.txt @@ -1,5 +1,5 @@ baseport: 14000 -basedir: /space/autotest +basedir: CHOOSE_dir mgm: CHOOSE_host1 ndb: CHOOSE_host2 CHOOSE_host3 CHOOSE_host2 CHOOSE_host3 api: CHOOSE_host1 CHOOSE_host1 CHOOSE_host1 @@ -11,7 +11,7 @@ DataMemory: 300M BackupMemory: 64M MaxNoOfConcurrentScans: 100 DataDir: . -FileSystemPath: /space/autotest/run +FileSystemPath: CHOOSE_dir/run [MGM DEFAULT] PortNumber: 14000 diff --git a/storage/ndb/test/run-test/conf-daily-devel-ndbmaster.txt b/storage/ndb/test/run-test/conf-daily-devel-ndbmaster.txt index 8b340e6a39d..8adbf84454d 100644 --- a/storage/ndb/test/run-test/conf-daily-devel-ndbmaster.txt +++ b/storage/ndb/test/run-test/conf-daily-devel-ndbmaster.txt @@ -1,5 +1,5 @@ baseport: 16000 -basedir: /space/autotest +basedir: CHOOSE_dir mgm: CHOOSE_host1 ndb: CHOOSE_host2 CHOOSE_host3 CHOOSE_host2 CHOOSE_host3 api: CHOOSE_host1 CHOOSE_host1 CHOOSE_host1 @@ -11,7 +11,7 @@ DataMemory: 300M BackupMemory: 64M MaxNoOfConcurrentScans: 100 DataDir: . -FileSystemPath: /space/autotest/run +FileSystemPath: CHOOSE_dir/run [MGM DEFAULT] PortNumber: 16000 diff --git a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt index 42efcbfce6c..6dbb078e0d2 100644 --- a/storage/ndb/test/run-test/daily-basic-tests.txt +++ b/storage/ndb/test/run-test/daily-basic-tests.txt @@ -208,6 +208,10 @@ cmd: testBasic args: -n MassiveRollback2 T1 T6 T13 D1 D2 max-time: 500 +cmd: testBasic +args: -n TupError + +max-time: 500 cmd: testTimeout args: T1 @@ -311,11 +315,19 @@ args: -n ScanUpdateAbort16 T6 D1 D2 max-time: 3600 cmd: testScan -args: -n ScanReadRestart T1 T6 T13 D1 D2 +args: -n ScanReadRestart T1 T6 T13 -max-time: 500 +max-time: 3600 +cmd: testScan +args: -n ScanReadRestart D1 D2 + +max-time: 1200 +cmd: testScan +args: -n ScanUpdateRestart T6 + +max-time: 1200 cmd: testScan -args: -n ScanUpdateRestart T6 D1 D2 +args: -n ScanUpdateRestart D1 D2 max-time: 500 cmd: testScan @@ -397,9 +409,13 @@ max-time: 500 cmd: testScan args: -n CheckAfterTerror T1 D1 D2 -max-time: 500 +max-time: 1200 +cmd: testScan +args: -n ScanReadWhileNodeIsDown T1 + +max-time: 1200 cmd: testScan -args: -n ScanReadWhileNodeIsDown T1 D1 D2 +args: -n ScanReadWhileNodeIsDown D1 D2 max-time: 500 cmd: testScan @@ -414,6 +430,10 @@ cmd: testScan args: -n ScanParallelism max-time: 500 +cmd: testScan +args: -n ScanVariants + +max-time: 500 cmd: testNodeRestart args: -n Bug15587 T1 @@ -560,7 +580,7 @@ max-time: 1500 cmd: testRestartGci args: T6 -max-time: 600 +max-time: 1500 cmd: testBlobs args: @@ -640,6 +660,18 @@ max-time: 1500 cmd: testSystemRestart args: -n SR_UNDO T8 +max-time: 1000 +cmd: testSRBank +args: -n SR -l 300 -r 15 T1 + +max-time: 1000 +cmd: testSRBank +args: -n NR -l 300 -r 15 T1 + +max-time: 1000 +cmd: testSRBank +args: -n Mix -l 300 -r 15 T1 + # OLD FLEX max-time: 500 cmd: flexBench diff --git a/storage/ndb/test/run-test/daily-devel-tests.txt b/storage/ndb/test/run-test/daily-devel-tests.txt index 67cf25a6f4d..34e914644fc 100644 --- a/storage/ndb/test/run-test/daily-devel-tests.txt +++ b/storage/ndb/test/run-test/daily-devel-tests.txt @@ -206,12 +206,12 @@ args: -l 1 -n SR9 T1 # max-time: 2500 cmd: test_event -args: -n EventOperationApplier +args: -n EventOperationApplier -l 2 # max-time: 2500 cmd: test_event -args: -n EventOperationApplier_NR +args: -n EventOperationApplier_NR -l 2 # max-time: 2500 diff --git a/storage/ndb/test/run-test/ndb-autotest.sh b/storage/ndb/test/run-test/ndb-autotest.sh index 4228d2354d3..6f94ef6e46c 100755 --- a/storage/ndb/test/run-test/ndb-autotest.sh +++ b/storage/ndb/test/run-test/ndb-autotest.sh @@ -35,6 +35,7 @@ report=yes clone=5.0-ndb RUN="daily-basic daily-devel" conf=autotest.conf +LOCK=$HOME/.autotest-lock ############################ # Read command line entries# @@ -105,7 +106,6 @@ fi # Setup the clone source location # #################################### -LOCK=$HOME/.autotest-lock src_clone=$src_clone_base-$clone ####################################### @@ -386,7 +386,8 @@ do awk '{for(i=1;i<='$count';i++)print $i;}'` echo $run_hosts >> /tmp/filter_hosts.$$ - choose $conf $run_hosts > d.tmp + choose $conf $run_hosts > d.tmp.$$ + sed -e s,CHOOSE_dir,"$install_dir",g < d.tmp.$$ > d.tmp $mkconfig d.tmp fi diff --git a/storage/ndb/test/src/NDBT_Table.cpp b/storage/ndb/test/src/NDBT_Table.cpp index 8d398b75d81..039dd3b8ddc 100644 --- a/storage/ndb/test/src/NDBT_Table.cpp +++ b/storage/ndb/test/src/NDBT_Table.cpp @@ -32,6 +32,8 @@ operator <<(class NdbOut& ndbout, const NDBT_Table & tab) ndbout << "Number of attributes: " << tab.getNoOfColumns() << endl; ndbout << "Number of primary keys: " << tab.getNoOfPrimaryKeys() << endl; ndbout << "Length of frm data: " << tab.getFrmLength() << endl; + ndbout << "Row Checksum: " << tab.getRowChecksumIndicator() << endl; + ndbout << "Row GCI: " << tab.getRowGCIIndicator() << endl; //<< ((tab.getTupleKey() == TupleId) ? " tupleid" : "") <<endl; diff --git a/storage/ndb/test/tools/hugoLoad.cpp b/storage/ndb/test/tools/hugoLoad.cpp index 2982e75cbf1..d5abfa65d03 100644 --- a/storage/ndb/test/tools/hugoLoad.cpp +++ b/storage/ndb/test/tools/hugoLoad.cpp @@ -62,6 +62,13 @@ int main(int argc, const char** argv){ { return NDBT_ProgramExit(NDBT_FAILED); } + + if (con.wait_until_ready(30,0) < 0) + { + ndbout << "Cluster nodes not ready in 30 seconds." << endl; + return NDBT_ProgramExit(NDBT_FAILED); + } + Ndb MyNdb( &con, db ? db : "TEST_DB" ); if(MyNdb.init() != 0){ @@ -69,10 +76,6 @@ int main(int argc, const char** argv){ return NDBT_ProgramExit(NDBT_FAILED); } - // Connect to Ndb and wait for it to become ready - while(MyNdb.waitUntilReady() != 0) - ndbout << "Waiting for ndb to become ready..." << endl; - for(Uint32 i = optind; i<argc; i++) { const char* _tabname = argv[i]; diff --git a/storage/ndb/test/tools/hugoPkDelete.cpp b/storage/ndb/test/tools/hugoPkDelete.cpp index 84e7ded0add..6dd9fb4a7de 100644 --- a/storage/ndb/test/tools/hugoPkDelete.cpp +++ b/storage/ndb/test/tools/hugoPkDelete.cpp @@ -60,6 +60,14 @@ int main(int argc, const char** argv){ { return NDBT_ProgramExit(NDBT_FAILED); } + + if (con.wait_until_ready(30,0) < 0) + { + ndbout << "Cluster nodes not ready in 30 seconds." << endl; + return NDBT_ProgramExit(NDBT_FAILED); + } + + Ndb MyNdb(&con, "TEST_DB" ); if(MyNdb.init() != 0){ @@ -67,9 +75,6 @@ int main(int argc, const char** argv){ return NDBT_ProgramExit(NDBT_FAILED); } - while(MyNdb.waitUntilReady() != 0) - ndbout << "Waiting for ndb to become ready..." << endl; - // Check if table exists in db const NdbDictionary::Table * pTab = NDBT_Table::discoverTableFromDb(&MyNdb, _tabname); if(pTab == NULL){ diff --git a/storage/ndb/test/tools/hugoPkUpdate.cpp b/storage/ndb/test/tools/hugoPkUpdate.cpp index 7d46ae95c29..8cad006d12a 100644 --- a/storage/ndb/test/tools/hugoPkUpdate.cpp +++ b/storage/ndb/test/tools/hugoPkUpdate.cpp @@ -63,6 +63,13 @@ int main(int argc, const char** argv){ { return NDBT_ProgramExit(NDBT_FAILED); } + + if (con.wait_until_ready(30,0) < 0) + { + ndbout << "Cluster nodes not ready in 30 seconds." << endl; + return NDBT_ProgramExit(NDBT_FAILED); + } + Ndb MyNdb( &con, db ? db : "TEST_DB" ); if(MyNdb.init() != 0){ @@ -70,9 +77,6 @@ int main(int argc, const char** argv){ return NDBT_ProgramExit(NDBT_FAILED); } - while(MyNdb.waitUntilReady() != 0) - ndbout << "Waiting for ndb to become ready..." << endl; - // Check if table exists in db const NdbDictionary::Table * pTab = NDBT_Table::discoverTableFromDb(&MyNdb, _tabname); if(pTab == NULL){ diff --git a/storage/ndb/test/tools/hugoScanUpdate.cpp b/storage/ndb/test/tools/hugoScanUpdate.cpp index 07807b254ee..f35e2df0f71 100644 --- a/storage/ndb/test/tools/hugoScanUpdate.cpp +++ b/storage/ndb/test/tools/hugoScanUpdate.cpp @@ -66,6 +66,13 @@ int main(int argc, const char** argv){ { return NDBT_ProgramExit(NDBT_FAILED); } + + if (con.wait_until_ready(30,0) < 0) + { + ndbout << "Cluster nodes not ready in 30 seconds." << endl; + return NDBT_ProgramExit(NDBT_FAILED); + } + Ndb MyNdb( &con, db ? db : "TEST_DB" ); if(MyNdb.init() != 0){ @@ -73,9 +80,6 @@ int main(int argc, const char** argv){ return NDBT_ProgramExit(NDBT_FAILED); } - while(MyNdb.waitUntilReady() != 0) - ndbout << "Waiting for ndb to become ready..." << endl; - // Check if table exists in db const NdbDictionary::Table * pTab = NDBT_Table::discoverTableFromDb(&MyNdb, _tabname); if(pTab == NULL){ diff --git a/storage/ndb/tools/desc.cpp b/storage/ndb/tools/desc.cpp index 06accb591bf..bbd69baacd2 100644 --- a/storage/ndb/tools/desc.cpp +++ b/storage/ndb/tools/desc.cpp @@ -21,8 +21,8 @@ void desc_AutoGrowSpecification(struct NdbDictionary::AutoGrowSpecification ags); int desc_logfilegroup(Ndb *myndb, char* name); -int desc_undofile(Ndb *myndb, char* name); -int desc_datafile(Ndb *myndb, char* name); +int desc_undofile(Ndb_cluster_connection &con, Ndb *myndb, char* name); +int desc_datafile(Ndb_cluster_connection &con, Ndb *myndb, char* name); int desc_tablespace(Ndb *myndb,char* name); int desc_table(Ndb *myndb,char* name); @@ -91,9 +91,9 @@ int main(int argc, char** argv){ ; else if(desc_logfilegroup(&MyNdb,argv[i])) ; - else if(desc_datafile(&MyNdb, argv[i])) + else if(desc_datafile(con, &MyNdb, argv[i])) ; - else if(desc_undofile(&MyNdb, argv[i])) + else if(desc_undofile(con, &MyNdb, argv[i])) ; else ndbout << "No such object: " << argv[i] << endl << endl; @@ -123,6 +123,7 @@ int desc_logfilegroup(Ndb *myndb, char* name) ndbout << "Name: " << lfg.getName() << endl; ndbout << "UndoBuffer size: " << lfg.getUndoBufferSize() << endl; ndbout << "Version: " << lfg.getObjectVersion() << endl; + ndbout << "Free Words: " << lfg.getUndoFreeWords() << endl; desc_AutoGrowSpecification(lfg.getAutoGrowSpecification()); @@ -149,62 +150,74 @@ int desc_tablespace(Ndb *myndb, char* name) return 1; } -int desc_undofile(Ndb *myndb, char* name) +int desc_undofile(Ndb_cluster_connection &con, Ndb *myndb, char* name) { + unsigned id; NdbDictionary::Dictionary *dict= myndb->getDictionary(); + Ndb_cluster_connection_node_iter iter; + assert(dict); - NdbDictionary::Undofile uf= dict->getUndofile(0, name); - NdbError err= dict->getNdbError(); - if(err.classification!=ndberror_cl_none) - return 0; - ndbout << "Type: Undofile" << endl; - ndbout << "Name: " << name << endl; - ndbout << "Path: " << uf.getPath() << endl; - ndbout << "Size: " << uf.getSize() << endl; - ndbout << "Free: " << uf.getFree() << endl; + con.init_get_next_node(iter); - ndbout << "Logfile Group: " << uf.getLogfileGroup() << endl; + while(id= con.get_next_node(iter)) + { + NdbDictionary::Undofile uf= dict->getUndofile(0, name); + NdbError err= dict->getNdbError(); + if(err.classification!=ndberror_cl_none) + return 0; - /** FIXME: are these needed, the functions aren't there - but the prototypes are... + ndbout << "Type: Undofile" << endl; + ndbout << "Name: " << name << endl; + ndbout << "Node: " << id << endl; + ndbout << "Path: " << uf.getPath() << endl; + ndbout << "Size: " << uf.getSize() << endl; - ndbout << "Node: " << uf.getNode() << endl; + ndbout << "Logfile Group: " << uf.getLogfileGroup() << endl; - ndbout << "Number: " << uf.getFileNo() << endl; - */ + /** FIXME: are these needed, the functions aren't there + but the prototypes are... - ndbout << endl; + ndbout << "Number: " << uf.getFileNo() << endl; + */ + + ndbout << endl; + } return 1; } -int desc_datafile(Ndb *myndb, char* name) +int desc_datafile(Ndb_cluster_connection &con, Ndb *myndb, char* name) { + unsigned id; NdbDictionary::Dictionary *dict= myndb->getDictionary(); assert(dict); - NdbDictionary::Datafile df= dict->getDatafile(0, name); - NdbError err= dict->getNdbError(); - if(err.classification!=ndberror_cl_none) - return 0; + Ndb_cluster_connection_node_iter iter; - ndbout << "Type: Datafile" << endl; - ndbout << "Name: " << name << endl; - ndbout << "Path: " << df.getPath() << endl; - ndbout << "Size: " << df.getSize() << endl; - ndbout << "Free: " << df.getFree() << endl; + con.init_get_next_node(iter); - ndbout << "Tablespace: " << df.getTablespace() << endl; + while(id= con.get_next_node(iter)) + { + NdbDictionary::Datafile df= dict->getDatafile(id, name); + NdbError err= dict->getNdbError(); + if(err.classification!=ndberror_cl_none) + return 0; - /** FIXME: are these needed, the functions aren't there - but the prototypes are... + ndbout << "Type: Datafile" << endl; + ndbout << "Name: " << name << endl; + ndbout << "Node: " << id << endl; + ndbout << "Path: " << df.getPath() << endl; + ndbout << "Size: " << df.getSize() << endl; + ndbout << "Free: " << df.getFree() << endl; - ndbout << "Node: " << uf.getNode() << endl; + ndbout << "Tablespace: " << df.getTablespace() << endl; - ndbout << "Number: " << uf.getFileNo() << endl; - */ + /** We probably don't need to display this ever... + ndbout << "Number: " << uf.getFileNo() << endl; + */ - ndbout << endl; + ndbout << endl; + } return 1; } diff --git a/storage/ndb/tools/ndb_error_reporter b/storage/ndb/tools/ndb_error_reporter new file mode 100644 index 00000000000..2b5aadb6171 --- /dev/null +++ b/storage/ndb/tools/ndb_error_reporter @@ -0,0 +1,88 @@ +#!/usr/bin/perl -w + +use strict; + +if(@ARGV < 1) +{ + print STDERR "Usage:\n"; + print STDERR "\tndb_error_reporter config.ini [username] [--fs]\n\n"; + print STDERR "\tusername is a user that you can use to ssh into\n"; + print STDERR "\t all of your nodes with.\n\n"; + print STDERR "\t--fs means include the filesystems in the report\n"; + print STDERR "\t WARNING: This may require a lot of disk space.\n"; + print STDERR "\t Only use this option when asked to.\n\n"; + exit(1); +} + +my $config_file= $ARGV[0]; +my $config_get_fs= 0; +my $config_username= ''; +if(defined($ARGV[1])) +{ + $config_get_fs= 1 if $ARGV[1] eq '--fs'; + $config_username= $ARGV[1].'@' if $ARGV[1] ne '--fs'; + $config_get_fs= (defined $ARGV[2] && $ARGV[2] eq '--fs')?1:$config_get_fs; +} + +if(!stat($config_file)) +{ + print STDERR "Cannot open configuration file.\n\n"; + exit(1); +} + +my @nodes= split ' ',`ndb_config --config-file=$ARGV[0] --nodes --query=id --type=ndbd`; + +push @nodes, split ' ',`ndb_config --config-file=$ARGV[0] --nodes --query=id --type=ndb_mgmd`; + +sub config { + my $nodeid= shift; + my $query= shift; + my $res= `ndb_config --config-file=$ARGV[0] --id=$nodeid --query=$query`; + chomp $res; + $res; +} + +my @t= localtime(); +my $reportdir= sprintf('ndb_error_report_%u%02u%02u%02u%02u%02u', + ($t[5]+1900),($t[4]+1),$t[3],$t[2],$t[1],$t[0]); + +if(stat($reportdir) || stat($reportdir.'tar.bz2')) +{ + print STDERR "It looks like another ndb_error_report process is running.\n"; + print STDERR "If that is not the case, remove the ndb_error_report directory"; + print STDERR " and run ndb_error_report again.\n\n"; + exit(1); +} + +mkdir($reportdir); + +foreach my $node (@nodes) +{ + print "\n\n Copying data from node $node". + (($config_get_fs)?" with filesystem":""). + "\n\n"; + my $recurse= ($config_get_fs)?'-r ':''; + system 'scp '.$recurse.$config_username.config($node,'host'). + ':'.config($node,'datadir')."/ndb_".$node."* ". + "$reportdir/\n"; +} + +print "\n\n Copying configuration file...\n\n\t$config_file\n\n"; +system "cp $config_file $reportdir/"; + +my $r = system 'bzip2 2>&1 > /dev/null < /dev/null'; +my $outfile; +if($r==0) +{ + $outfile= "$reportdir.tar.bz2"; + system "tar c $reportdir|bzip2 > $outfile"; +} +else +{ + $outfile= "$reportdir.tar.gz"; + system "tar c $reportdir|gzip > $outfile"; +} + +system "rm -rf $reportdir"; + +print "\n\nPlease attach $outfile to your error report\n\n"; diff --git a/storage/ndb/tools/restore/Restore.cpp b/storage/ndb/tools/restore/Restore.cpp index cdba4a63824..c60cf782fc8 100644 --- a/storage/ndb/tools/restore/Restore.cpp +++ b/storage/ndb/tools/restore/Restore.cpp @@ -140,27 +140,137 @@ RestoreMetaData::readMetaTableList() { bool RestoreMetaData::readMetaTableDesc() { - Uint32 sectionInfo[2]; + Uint32 sectionInfo[3]; // Read section header - if (buffer_read(§ionInfo, sizeof(sectionInfo), 1) != 1){ + Uint32 sz = sizeof(sectionInfo) >> 2; + if (m_fileHeader.NdbVersion < NDBD_ROWID_VERSION) + { + sz = 2; + sectionInfo[2] = htonl(DictTabInfo::UserTable); + } + if (buffer_read(§ionInfo, 4*sz, 1) != 1){ err << "readMetaTableDesc read header error" << endl; return false; } // if sectionInfo[0] = ntohl(sectionInfo[0]); sectionInfo[1] = ntohl(sectionInfo[1]); + sectionInfo[2] = ntohl(sectionInfo[2]); assert(sectionInfo[0] == BackupFormat::TABLE_DESCRIPTION); // Read dictTabInfo buffer - const Uint32 len = (sectionInfo[1] - 2); + const Uint32 len = (sectionInfo[1] - sz); void *ptr; if (buffer_get_ptr(&ptr, 4, len) != len){ err << "readMetaTableDesc read error" << endl; return false; } // if - return parseTableDescriptor((Uint32*)ptr, len); + int errcode = 0; + DictObject obj = { sectionInfo[2], 0 }; + switch(obj.m_objType){ + case DictTabInfo::SystemTable: + case DictTabInfo::UserTable: + case DictTabInfo::UniqueHashIndex: + case DictTabInfo::OrderedIndex: + return parseTableDescriptor((Uint32*)ptr, len); + break; + case DictTabInfo::Tablespace: + { + NdbDictionary::Tablespace * dst = new NdbDictionary::Tablespace; + errcode = + NdbDictInterface::parseFilegroupInfo(NdbTablespaceImpl::getImpl(* dst), + (Uint32*)ptr, len); + if (errcode) + delete dst; + obj.m_objPtr = dst; + debug << hex << obj.m_objPtr << " " + << dec << dst->getObjectId() << " " << dst->getName() << endl; + break; + } + case DictTabInfo::LogfileGroup: + { + NdbDictionary::LogfileGroup * dst = new NdbDictionary::LogfileGroup; + errcode = + NdbDictInterface::parseFilegroupInfo(NdbLogfileGroupImpl::getImpl(* dst), + (Uint32*)ptr, len); + if (errcode) + delete dst; + obj.m_objPtr = dst; + debug << hex << obj.m_objPtr << " " + << dec << dst->getObjectId() << " " << dst->getName() << endl; + break; + } + case DictTabInfo::Datafile: + { + NdbDictionary::Datafile * dst = new NdbDictionary::Datafile; + errcode = + NdbDictInterface::parseFileInfo(NdbDatafileImpl::getImpl(* dst), + (Uint32*)ptr, len); + if (errcode) + delete dst; + obj.m_objPtr = dst; + debug << hex << obj.m_objPtr << " " + << dec << dst->getObjectId() << " " << dst->getPath() << endl; + break; + } + case DictTabInfo::Undofile: + { + NdbDictionary::Undofile * dst = new NdbDictionary::Undofile; + errcode = + NdbDictInterface::parseFileInfo(NdbUndofileImpl::getImpl(* dst), + (Uint32*)ptr, len); + if (errcode) + delete dst; + obj.m_objPtr = dst; + debug << hex << obj.m_objPtr << " " + << dec << dst->getObjectId() << " " << dst->getPath() << endl; + break; + } + default: + err << "Unsupported table type!! " << sectionInfo[2] << endl; + return false; + } + if (errcode) + { + err << "Unable to parse dict info..." + << sectionInfo[2] << " " << errcode << endl; + return false; + } + + /** + * DD objects need to be sorted... + */ + for(Uint32 i = 0; i<m_objects.size(); i++) + { + switch(sectionInfo[2]){ + case DictTabInfo::Tablespace: + if (DictTabInfo::isFile(m_objects[i].m_objType)) + { + m_objects.push(obj, i); + goto end; + } + break; + case DictTabInfo::LogfileGroup: + { + if (DictTabInfo::isFile(m_objects[i].m_objType) || + m_objects[i].m_objType == DictTabInfo::Tablespace) + { + m_objects.push(obj, i); + goto end; + } + break; + } + default: + m_objects.push_back(obj); + goto end; + } + } + m_objects.push_back(obj); + +end: + return true; } bool @@ -217,7 +327,7 @@ RestoreMetaData::parseTableDescriptor(const Uint32 * data, Uint32 len) { NdbTableImpl* tableImpl = 0; int ret = NdbDictInterface::parseTableInfo(&tableImpl, data, len, false); - + if (ret != 0) { err << "parseTableInfo " << " failed" << endl; return false; @@ -960,4 +1070,4 @@ operator<<(NdbOut& ndbout, const TableS & table){ template class Vector<TableS*>; template class Vector<AttributeS*>; template class Vector<AttributeDesc*>; - +template class Vector<DictObject>; diff --git a/storage/ndb/tools/restore/Restore.hpp b/storage/ndb/tools/restore/Restore.hpp index 85793baf9df..2c821c998bc 100644 --- a/storage/ndb/tools/restore/Restore.hpp +++ b/storage/ndb/tools/restore/Restore.hpp @@ -267,6 +267,11 @@ public: bool Twiddle(const AttributeDesc * attr_desc, AttributeData * attr_data, Uint32 arraySize = 0); }; +struct DictObject { + Uint32 m_objType; + void * m_objPtr; +}; + class RestoreMetaData : public BackupFile { Vector<TableS *> allTables; @@ -281,6 +286,8 @@ class RestoreMetaData : public BackupFile { bool parseTableDescriptor(const Uint32 * data, Uint32 len); + Vector<DictObject> m_objects; + public: RestoreMetaData(const char * path, Uint32 nodeId, Uint32 bNo); virtual ~RestoreMetaData(); @@ -292,6 +299,10 @@ public: const TableS * operator[](int i) const { return allTables[i];} TableS * getTable(Uint32 tableId) const; + Uint32 getNoOfObjects() const { return m_objects.size();} + Uint32 getObjType(Uint32 i) const { return m_objects[i].m_objType; } + void* getObjPtr(Uint32 i) const { return m_objects[i].m_objPtr; } + Uint32 getStopGCP() const; }; // RestoreMetaData diff --git a/storage/ndb/tools/restore/consumer.hpp b/storage/ndb/tools/restore/consumer.hpp index 692c814159f..9cfbd3ca592 100644 --- a/storage/ndb/tools/restore/consumer.hpp +++ b/storage/ndb/tools/restore/consumer.hpp @@ -23,6 +23,7 @@ class BackupConsumer { public: virtual ~BackupConsumer() { } virtual bool init() { return true;} + virtual bool object(Uint32 tableType, const void*) { return true;} virtual bool table(const TableS &){return true;} virtual bool endOfTables() { return true; } virtual void tuple(const TupleS &){} diff --git a/storage/ndb/tools/restore/consumer_restore.cpp b/storage/ndb/tools/restore/consumer_restore.cpp index 620e4702b15..fd9daf2856d 100644 --- a/storage/ndb/tools/restore/consumer_restore.cpp +++ b/storage/ndb/tools/restore/consumer_restore.cpp @@ -152,6 +152,141 @@ BackupRestore::finalize_table(const TableS & table){ return ret; } +#include <signaldata/DictTabInfo.hpp> + +bool +BackupRestore::object(Uint32 type, const void * ptr) +{ + if (!m_restore_meta) + return true; + + NdbDictionary::Dictionary* dict = m_ndb->getDictionary(); + switch(type){ + case DictTabInfo::Tablespace: + { + NdbDictionary::Tablespace old(*(NdbDictionary::Tablespace*)ptr); + + Uint32 id = old.getObjectId(); + + if (!m_no_restore_disk) + { + NdbDictionary::LogfileGroup * lg = m_logfilegroups[old.getDefaultLogfileGroupId()]; + old.setDefaultLogfileGroup(* lg); + int ret = dict->createTablespace(old); + if (ret) + { + NdbError errobj= dict->getNdbError(); + err << "Failed to create tablespace \"" << old.getName() << "\": " + << errobj << endl; + return false; + } + debug << "Created tablespace: " << old.getName() << endl; + } + + NdbDictionary::Tablespace curr = dict->getTablespace(old.getName()); + NdbError errobj = dict->getNdbError(); + if(errobj.classification == ndberror_cl_none) + { + NdbDictionary::Tablespace* currptr = new NdbDictionary::Tablespace(curr); + NdbDictionary::Tablespace * null = 0; + m_tablespaces.set(currptr, id, null); + debug << "Retreived tablspace: " << currptr->getName() + << " oldid: " << id << " newid: " << currptr->getObjectId() + << " " << (void*)currptr << endl; + return true; + } + + err << "Failed to retrieve tablespace \"" << old.getName() << "\": " + << errobj << endl; + + return false; + break; + } + case DictTabInfo::LogfileGroup: + { + NdbDictionary::LogfileGroup old(*(NdbDictionary::LogfileGroup*)ptr); + + Uint32 id = old.getObjectId(); + + if (!m_no_restore_disk) + { + int ret = dict->createLogfileGroup(old); + if (ret) + { + NdbError errobj= dict->getNdbError(); + err << "Failed to create logfile group \"" << old.getName() << "\": " + << errobj << endl; + return false; + } + debug << "Created logfile group: " << old.getName() << endl; + } + + NdbDictionary::LogfileGroup curr = dict->getLogfileGroup(old.getName()); + NdbError errobj = dict->getNdbError(); + if(errobj.classification == ndberror_cl_none) + { + NdbDictionary::LogfileGroup* currptr = + new NdbDictionary::LogfileGroup(curr); + NdbDictionary::LogfileGroup * null = 0; + m_logfilegroups.set(currptr, id, null); + debug << "Retreived logfile group: " << currptr->getName() + << " oldid: " << id << " newid: " << currptr->getObjectId() + << " " << (void*)currptr << endl; + return true; + } + + err << "Failed to retrieve logfile group \"" << old.getName() << "\": " + << errobj << endl; + + return false; + break; + } + case DictTabInfo::Datafile: + { + if (!m_no_restore_disk) + { + NdbDictionary::Datafile old(*(NdbDictionary::Datafile*)ptr); + NdbDictionary::Tablespace * ts = m_tablespaces[old.getTablespaceId()]; + debug << "Connecting datafile " << old.getPath() + << " to tablespace: oldid: " << old.getTablespaceId() + << " newid: " << ts->getObjectId() << endl; + old.setTablespace(* ts); + if (dict->createDatafile(old)) + { + err << "Failed to create datafile \"" << old.getPath() << "\": " + << dict->getNdbError() << endl; + return false; + } + } + return true; + break; + } + case DictTabInfo::Undofile: + { + if (!m_no_restore_disk) + { + NdbDictionary::Undofile old(*(NdbDictionary::Undofile*)ptr); + NdbDictionary::LogfileGroup * lg = + m_logfilegroups[old.getLogfileGroupId()]; + debug << "Connecting undofile " << old.getPath() + << " to logfile group: oldid: " << old.getLogfileGroupId() + << " newid: " << lg->getObjectId() + << " " << (void*)lg << endl; + old.setLogfileGroup(* lg); + if (dict->createUndofile(old)) + { + err << "Failed to create undofile \"" << old.getPath() << "\": " + << dict->getNdbError() << endl; + return false; + } + } + return true; + break; + } + } + return true; +} + bool BackupRestore::table(const TableS & table){ if (!m_restore && !m_restore_meta) @@ -186,7 +321,15 @@ BackupRestore::table(const TableS & table){ NdbDictionary::Table copy(*table.m_dictTable); copy.setName(split[2].c_str()); - + if (copy.getTablespaceId() != RNIL) + { + Uint32 id = copy.getTablespaceId(); + debug << "Connecting " << name << " to tablespace oldid: " << id << flush; + NdbDictionary::Tablespace* ts = m_tablespaces[copy.getTablespaceId()]; + debug << " newid: " << ts->getObjectId() << endl; + copy.setTablespace(* ts); + } + if (dict->createTable(copy) == -1) { err << "Create table " << table.getTableName() << " failed: " @@ -713,3 +856,5 @@ BackupRestore::tuple(const TupleS & tup) template class Vector<NdbDictionary::Table*>; template class Vector<const NdbDictionary::Table*>; +template class Vector<NdbDictionary::Tablespace*>; +template class Vector<NdbDictionary::LogfileGroup*>; diff --git a/storage/ndb/tools/restore/consumer_restore.hpp b/storage/ndb/tools/restore/consumer_restore.hpp index 1bf6d89a912..385a792b4ca 100644 --- a/storage/ndb/tools/restore/consumer_restore.hpp +++ b/storage/ndb/tools/restore/consumer_restore.hpp @@ -39,6 +39,7 @@ public: m_logCount = m_dataCount = 0; m_restore = false; m_restore_meta = false; + m_no_restore_disk = false; m_parallelism = parallelism; m_callback = 0; m_free_callback = 0; @@ -49,6 +50,7 @@ public: virtual ~BackupRestore(); virtual bool init(); virtual void release(); + virtual bool object(Uint32 type, const void* ptr); virtual bool table(const TableS &); virtual bool endOfTables(); virtual void tuple(const TupleS &); @@ -66,6 +68,7 @@ public: Ndb_cluster_connection * m_cluster_connection; bool m_restore; bool m_restore_meta; + bool m_no_restore_disk; Uint32 m_logCount; Uint32 m_dataCount; @@ -88,6 +91,8 @@ public: const NdbDictionary::Table* get_table(const NdbDictionary::Table* ); Vector<const NdbDictionary::Table*> m_indexes; + Vector<NdbDictionary::Tablespace*> m_tablespaces; // Index by id + Vector<NdbDictionary::LogfileGroup*> m_logfilegroups;// Index by id }; #endif diff --git a/storage/ndb/tools/restore/restore_main.cpp b/storage/ndb/tools/restore/restore_main.cpp index af7c751fb67..ee934d6ccda 100644 --- a/storage/ndb/tools/restore/restore_main.cpp +++ b/storage/ndb/tools/restore/restore_main.cpp @@ -50,6 +50,7 @@ static int _print_data = 0; static int _print_log = 0; static int _restore_data = 0; static int _restore_meta = 0; +static int _no_restore_disk = 0; static struct my_option my_long_options[] = { @@ -71,6 +72,10 @@ static struct my_option my_long_options[] = "Restore meta data into NDB Cluster using NDBAPI", (gptr*) &_restore_meta, (gptr*) &_restore_meta, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "no-restore-disk-objects", 'd', + "Dont restore disk objects (tablespace/logfilegroups etc)", + (gptr*) &_no_restore_disk, (gptr*) &_no_restore_disk, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { "parallelism", 'p', "No of parallel transactions during restore of data." "(parallelism can be 1 to 1024)", @@ -187,6 +192,11 @@ readArguments(int *pargc, char*** pargv) restore->m_restore_meta = true; } + if (_no_restore_disk) + { + restore->m_no_restore_disk = true; + } + { BackupConsumer * c = printer; g_consumers.push_back(c); @@ -303,6 +313,19 @@ main(int argc, char** argv) } + for(i = 0; i<metaData.getNoOfObjects(); i++) + { + for(Uint32 j= 0; j < g_consumers.size(); j++) + if (!g_consumers[j]->object(metaData.getObjType(i), + metaData.getObjPtr(i))) + { + ndbout_c("Restore: Failed to restore table: %s. " + "Exiting...", + metaData[i]->getTableName()); + exitHandler(NDBT_FAILED); + } + } + for(i = 0; i<metaData.getNoOfTables(); i++) { if (checkSysTable(metaData[i]->getTableName())) diff --git a/storage/ndb/tools/select_all.cpp b/storage/ndb/tools/select_all.cpp index b9d29db2082..fecb55cf734 100644 --- a/storage/ndb/tools/select_all.cpp +++ b/storage/ndb/tools/select_all.cpp @@ -45,6 +45,9 @@ static int _unqualified, _header, _parallelism, _useHexFormat, _lock, static int _tup = 0; static int _dumpDisk = 0; +static int use_rowid = 0; +static int nodata = 0; +static int use_gci = 0; static struct my_option my_long_options[] = { @@ -76,9 +79,18 @@ static struct my_option my_long_options[] = { "disk", 256, "Dump disk ref", (gptr*) &_dumpDisk, (gptr*) &_dumpDisk, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "rowid", 256, "Dump rowid", + (gptr*) &use_rowid, (gptr*) &use_rowid, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "gci", 256, "Dump gci", + (gptr*) &use_gci, (gptr*) &use_gci, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { "tupscan", 't', "Scan in tup order", (gptr*) &_tup, (gptr*) &_tup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "nodata", 256, "Dont print data", + (gptr*) &nodata, (gptr*) &nodata, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; static void usage() @@ -222,12 +234,14 @@ int scanReadRecords(Ndb* pNdb, } int rs; + unsigned scan_flags = 0; + if (_tup) scan_flags |= NdbScanOperation::SF_TupScan; switch(_lock + (3 * order)){ case 1: - rs = pOp->readTuples(NdbScanOperation::LM_Read, 0, parallel); + rs = pOp->readTuples(NdbScanOperation::LM_Read, scan_flags, parallel); break; case 2: - rs = pOp->readTuples(NdbScanOperation::LM_Exclusive, 0, parallel); + rs = pOp->readTuples(NdbScanOperation::LM_Exclusive, scan_flags, parallel); break; case 3: rs = pIOp->readTuples(NdbScanOperation::LM_CommittedRead, 0, parallel, @@ -241,8 +255,7 @@ int scanReadRecords(Ndb* pNdb, break; case 0: default: - rs = pOp->readTuples(NdbScanOperation::LM_CommittedRead, - _tup ? NdbScanOperation::SF_TupScan : 0, parallel); + rs = pOp->readTuples(NdbScanOperation::LM_CommittedRead, scan_flags, parallel); break; } if( rs != 0 ){ @@ -308,19 +321,32 @@ int scanReadRecords(Ndb* pNdb, const NdbDictionary::Column* col = pTab->getColumn(a); if(col->getStorageType() == NdbDictionary::Column::StorageTypeDisk) disk= true; - - if((row->attributeStore(a) = pOp->getValue(col)) == 0) - { - ERR(pTrans->getNdbError()); - pNdb->closeTransaction(pTrans); - return -1; - } + + if (!nodata) + if((row->attributeStore(a) = pOp->getValue(col)) == 0) + { + ERR(pTrans->getNdbError()); + pNdb->closeTransaction(pTrans); + return -1; + } } NdbRecAttr * disk_ref= 0; if(_dumpDisk && disk) disk_ref = pOp->getValue(NdbDictionary::Column::DISK_REF); + NdbRecAttr * rowid= 0, *frag = 0, *gci = 0; + if (use_rowid) + { + frag = pOp->getValue(NdbDictionary::Column::FRAGMENT); + rowid = pOp->getValue(NdbDictionary::Column::ROWID); + } + + if (use_gci) + { + gci = pOp->getValue(NdbDictionary::Column::ROW_GCI); + } + check = pTrans->execute(NdbTransaction::NoCommit); if( check == -1 ) { const NdbError err = pTrans->getNdbError(); @@ -336,12 +362,18 @@ int scanReadRecords(Ndb* pNdb, return -1; } - if (headers) + if (rowid) + ndbout << "ROWID\t"; + + if (gci) + ndbout << "\tGCI"; + + if (headers && !nodata) row->header(ndbout); if (disk_ref) ndbout << "\tDISK_REF"; - + ndbout << endl; int eof; @@ -350,12 +382,28 @@ int scanReadRecords(Ndb* pNdb, while(eof == 0){ rows++; + + if (useHexFormat) + ndbout.setHexFormat(1); + + if (rowid) + { + ndbout << "[ fragment: " << frag->u_32_value() + << " m_page: " << rowid->u_32_value() + << " m_page_idx: " << *(Uint32*)(rowid->aRef() + 4) << " ]"; + ndbout << "\t"; + } - if (useHexFormat) { - ndbout.setHexFormat(1) << (*row); - } else { - ndbout << (*row); + if (gci) + { + if (gci->isNULL()) + ndbout << "NULL\t"; + else + ndbout << gci->u_64_value() << "\t"; } + + if (!nodata) + ndbout << (*row); if(disk_ref) { @@ -365,7 +413,8 @@ int scanReadRecords(Ndb* pNdb, << " m_page_idx: " << *(Uint16*)(disk_ref->aRef() + 4) << " ]"; } - ndbout << endl; + if (rowid || disk_ref || gci || !nodata) + ndbout << endl; eof = pOp->nextResult(); } if (eof == -1) { |